From 5e7c4d4ec10855ebe8f19af8a80fdfa26caad5e9 Mon Sep 17 00:00:00 2001 From: Yi-Yen Chung <45251297+yyctw@users.noreply.github.com> Date: Wed, 18 Oct 2023 10:30:08 +0800 Subject: [PATCH] NEON: more fp16 using intrinsics supported by architecture v7 (skip version) (#1081) * [NEON] Add vabal_{s/u}{8/16/32} * [NEON] Add vabal_high_{s/u}{8/16/32} * [NEON] Add all vcale* intrinsics (9) * [NEON] Add all vcalt intrinsics (9) * [NEON] Add vcreate_f16 * [NEON] Add vreinterpret_u64_f16 * [NEON] Add vcvth_f16_s16 and vcvth_f16_u16 * [NEON] Add vduph_lane_f16, vdup_lane_f16, and vdupq_lane_f16 * [NEON] Add vext_f16 * [NEON] Add 16 vcvt{q}_n_* intrinsics * [Fix] Correct function input parameters * [NEON] Add 6 vcvtn_{s/u}{16/32/64}_f{*} intrinsics * [Fix] Correct vdup_lane_f16 and vdupq_lane_f16. * [Fix] Correct function input parameters. * [NEON] Add 24 vcvt{q}_n_* intrinsics * [NEON] Add all vcvtn* intrinsics * [NEON] Add vfmah_f16 and vfma_f16 * [NEON] Add vfma_n_f16 and vfmaq_n_f16 * [NEON] Add vmulh_f16 * [NEON] Add fma_lane related intrinsics. * [NEON] Add 5 vmul* related intrinsics vmulh_lane_f16, vmulh_laneq_f16, vmul_lane_f16, vmul_laneq_f16, vmulq_laneq_f16. * [NEON] Add neg related intrinsics. * [NEON] Add all fms, fms_n, and fms_lane intrinsics * [NEON] Add types float16x{4/8}x{2/3/4} * [NEON] Add 9 vld1 related intrinsics * [Fix] Modified wrong rounding implementation. Modified wrong implementation "Ties to Away" to "rounding to nearest with ties to Away" add.h: Remove redundant code. * [Fix] Fix wrong intrinsic alias names. * [Refactor] Remove redundant functions. * [NEON] Add 45 ld2 related intrinsics one ld2_f16, twenty-two ld2_lane series, and twenty-two ld2_dup series. * [NEON] Add ld3_dup, ld3_lane, and ld4_dup * [NEON] Add vld3_f16 and vld4_f16. * [NEON] Add vld{3/4}_{dup/lane} series intrinsics * [NEON] Add mla_{high}_lane series intrinsics * [NEON] Add qdmlal_{high}_{lane} series intrinsics. * [NEON] Add qdmlal_lane and qdmlal_n series intrinsics * [NEON] Add mls_lane and mlsl_high_lane series intrinsics * [NEON] Add 22 qdmlsl series intrinsics * [NEON] Add 10 qdmull_* series intrinsics * [NEON] Add 3 qdmulh series intrinsics * [Fix] Fix wrong function name. * [Fix] Correct the wrong alias function name. * [NEON] Add qdmullh_lane{q}_s{16/32} related intrinsics * [NEON] Add qdmull_n and qdmull_high_lane series intrinsics * [Fix] Add conditions for fp16 intrinsics * [Hack] Skip functions that trigger compiler bugs. --- meson.build | 34 + simde/arm/neon.h | 34 + simde/arm/neon/abal.h | 125 ++ simde/arm/neon/abal_high.h | 125 ++ simde/arm/neon/cale.h | 165 +++ simde/arm/neon/calt.h | 165 +++ simde/arm/neon/create.h | 19 +- simde/arm/neon/cvt.h | 38 +- simde/arm/neon/cvt_n.h | 579 +++++++++ simde/arm/neon/cvtn.h | 380 +++++- simde/arm/neon/dup_lane.h | 41 + simde/arm/neon/ext.h | 26 + simde/arm/neon/fma.h | 28 + simde/arm/neon/fma_lane.h | 92 ++ simde/arm/neon/fma_n.h | 29 + simde/arm/neon/fms.h | 139 +++ simde/arm/neon/fms_lane.h | 316 +++++ simde/arm/neon/fms_n.h | 125 ++ simde/arm/neon/get_lane.h | 2 +- simde/arm/neon/ld1_dup.h | 14 + simde/arm/neon/ld1_lane.h | 33 + simde/arm/neon/ld1_x2.h | 24 + simde/arm/neon/ld1_x3.h | 25 + simde/arm/neon/ld1_x4.h | 26 + simde/arm/neon/ld1q_x2.h | 25 + simde/arm/neon/ld1q_x3.h | 25 + simde/arm/neon/ld1q_x4.h | 26 + simde/arm/neon/ld2.h | 27 + simde/arm/neon/ld2_dup.h | 458 +++++++ simde/arm/neon/ld2_lane.h | 478 ++++++++ simde/arm/neon/ld3.h | 29 + simde/arm/neon/ld3_dup.h | 458 +++++++ simde/arm/neon/ld3_lane.h | 478 ++++++++ simde/arm/neon/ld4.h | 21 + simde/arm/neon/ld4_dup.h | 458 +++++++ simde/arm/neon/ld4_lane.h | 56 + simde/arm/neon/mla_lane.h | 103 ++ simde/arm/neon/mlal_high_lane.h | 147 +++ simde/arm/neon/mls_lane.h | 240 ++++ simde/arm/neon/mlsl_high_lane.h | 147 +++ simde/arm/neon/mul_lane.h | 113 ++ simde/arm/neon/neg.h | 61 + simde/arm/neon/qdmlal.h | 110 ++ simde/arm/neon/qdmlal_high.h | 83 ++ simde/arm/neon/qdmlal_high_lane.h | 125 ++ simde/arm/neon/qdmlal_high_n.h | 86 ++ simde/arm/neon/qdmlal_lane.h | 122 ++ simde/arm/neon/qdmlal_n.h | 69 ++ simde/arm/neon/qdmlsl.h | 110 ++ simde/arm/neon/qdmlsl_high.h | 81 ++ simde/arm/neon/qdmlsl_high_lane.h | 124 ++ simde/arm/neon/qdmlsl_high_n.h | 86 ++ simde/arm/neon/qdmlsl_lane.h | 122 ++ simde/arm/neon/qdmlsl_n.h | 69 ++ simde/arm/neon/qdmulh.h | 16 + simde/arm/neon/qdmulh_lane.h | 23 + simde/arm/neon/qdmull.h | 5 +- simde/arm/neon/qdmull_high.h | 69 ++ simde/arm/neon/qdmull_high_lane.h | 107 ++ simde/arm/neon/qdmull_high_n.h | 70 ++ simde/arm/neon/qdmull_lane.h | 206 ++++ simde/arm/neon/qdmull_n.h | 69 ++ simde/arm/neon/qshl.h | 3 +- simde/arm/neon/reinterpret.h | 17 + simde/arm/neon/sqrt.h | 2 +- simde/arm/neon/types.h | 30 +- test/arm/neon/abal.c | 392 ++++++ test/arm/neon/abal_high.c | 432 +++++++ test/arm/neon/add_testgen.py | 114 ++ test/arm/neon/cale.c | 518 ++++++++ test/arm/neon/calt.c | 521 ++++++++ test/arm/neon/create.c | 34 + test/arm/neon/cvt.c | 264 +++-- test/arm/neon/cvt_n.c | 1045 ++++++++++++++++ test/arm/neon/cvtn.c | 627 +++++++++- test/arm/neon/dup_lane.c | 101 ++ test/arm/neon/ext.c | 64 + test/arm/neon/fma.c | 123 +- test/arm/neon/fma_lane.c | 585 +++++++++ test/arm/neon/fma_n.c | 121 ++ test/arm/neon/fms.c | 322 +++++ test/arm/neon/fms_lane.c | 1213 +++++++++++++++++++ test/arm/neon/fms_n.c | 278 +++++ test/arm/neon/ld1_dup.c | 35 + test/arm/neon/ld1_lane.c | 100 ++ test/arm/neon/ld1_x2.c | 41 + test/arm/neon/ld1_x3.c | 52 + test/arm/neon/ld1_x4.c | 63 + test/arm/neon/ld1q_x2.c | 61 + test/arm/neon/ld1q_x3.c | 82 ++ test/arm/neon/ld1q_x4.c | 103 ++ test/arm/neon/ld2.c | 54 + test/arm/neon/ld2_dup.c | 1315 ++++++++++++++++++++ test/arm/neon/ld2_lane.c | 1449 ++++++++++++++++++++++ test/arm/neon/ld3.c | 1678 ++++++++++++++++++++++++++ test/arm/neon/ld3_dup.c | 1569 ++++++++++++++++++++++++ test/arm/neon/ld3_lane.c | 1777 +++++++++++++++++++++++++++ test/arm/neon/ld4.c | 88 ++ test/arm/neon/ld4_dup.c | 1846 +++++++++++++++++++++++++++++ test/arm/neon/ld4_lane.c | 232 ++++ test/arm/neon/mla_lane.c | 815 +++++++++++++ test/arm/neon/mlal_high_lane.c | 579 +++++++++ test/arm/neon/mls_lane.c | 1576 ++++++++++++++++++++++++ test/arm/neon/mlsl_high_lane.c | 579 +++++++++ test/arm/neon/modify_c.txt | 83 ++ test/arm/neon/mul.c | 43 + test/arm/neon/mul_lane.c | 260 ++++ test/arm/neon/neg.c | 120 ++ test/arm/neon/qdmlal.c | 224 ++++ test/arm/neon/qdmlal_high.c | 136 +++ test/arm/neon/qdmlal_high_lane.c | 295 +++++ test/arm/neon/qdmlal_high_n.c | 127 ++ test/arm/neon/qdmlal_lane.c | 593 +++++++++ test/arm/neon/qdmlal_n.c | 119 ++ test/arm/neon/qdmlsl.c | 224 ++++ test/arm/neon/qdmlsl_high.c | 136 +++ test/arm/neon/qdmlsl_high_lane.c | 295 +++++ test/arm/neon/qdmlsl_high_n.c | 127 ++ test/arm/neon/qdmlsl_lane.c | 593 +++++++++ test/arm/neon/qdmlsl_n.c | 119 ++ test/arm/neon/qdmulh.c | 43 + test/arm/neon/qdmulh_lane.c | 208 ++++ test/arm/neon/qdmull_high.c | 116 ++ test/arm/neon/qdmull_high_lane.c | 254 ++++ test/arm/neon/qdmull_high_n.c | 108 ++ test/arm/neon/qdmull_lane.c | 696 +++++++++++ test/arm/neon/qdmull_n.c | 100 ++ test/arm/neon/reinterpret.c | 45 +- test/arm/neon/test-neon.h | 138 ++- 129 files changed, 33598 insertions(+), 190 deletions(-) create mode 100644 simde/arm/neon/abal.h create mode 100644 simde/arm/neon/abal_high.h create mode 100644 simde/arm/neon/cale.h create mode 100644 simde/arm/neon/calt.h create mode 100644 simde/arm/neon/cvt_n.h create mode 100644 simde/arm/neon/fms.h create mode 100644 simde/arm/neon/fms_lane.h create mode 100644 simde/arm/neon/fms_n.h create mode 100644 simde/arm/neon/ld2_dup.h create mode 100644 simde/arm/neon/ld2_lane.h create mode 100644 simde/arm/neon/ld3_dup.h create mode 100644 simde/arm/neon/ld3_lane.h create mode 100644 simde/arm/neon/ld4_dup.h create mode 100644 simde/arm/neon/mlal_high_lane.h create mode 100644 simde/arm/neon/mls_lane.h create mode 100644 simde/arm/neon/mlsl_high_lane.h create mode 100644 simde/arm/neon/qdmlal.h create mode 100644 simde/arm/neon/qdmlal_high.h create mode 100644 simde/arm/neon/qdmlal_high_lane.h create mode 100644 simde/arm/neon/qdmlal_high_n.h create mode 100644 simde/arm/neon/qdmlal_lane.h create mode 100644 simde/arm/neon/qdmlal_n.h create mode 100644 simde/arm/neon/qdmlsl.h create mode 100644 simde/arm/neon/qdmlsl_high.h create mode 100644 simde/arm/neon/qdmlsl_high_lane.h create mode 100644 simde/arm/neon/qdmlsl_high_n.h create mode 100644 simde/arm/neon/qdmlsl_lane.h create mode 100644 simde/arm/neon/qdmlsl_n.h create mode 100644 simde/arm/neon/qdmull_high.h create mode 100644 simde/arm/neon/qdmull_high_lane.h create mode 100644 simde/arm/neon/qdmull_high_n.h create mode 100644 simde/arm/neon/qdmull_lane.h create mode 100644 simde/arm/neon/qdmull_n.h create mode 100644 test/arm/neon/abal.c create mode 100644 test/arm/neon/abal_high.c create mode 100644 test/arm/neon/add_testgen.py create mode 100644 test/arm/neon/cale.c create mode 100644 test/arm/neon/calt.c create mode 100644 test/arm/neon/cvt_n.c create mode 100644 test/arm/neon/fms.c create mode 100644 test/arm/neon/fms_lane.c create mode 100644 test/arm/neon/fms_n.c create mode 100644 test/arm/neon/ld2_dup.c create mode 100644 test/arm/neon/ld2_lane.c create mode 100644 test/arm/neon/ld3.c create mode 100644 test/arm/neon/ld3_dup.c create mode 100644 test/arm/neon/ld3_lane.c create mode 100644 test/arm/neon/ld4.c create mode 100644 test/arm/neon/ld4_dup.c create mode 100644 test/arm/neon/mlal_high_lane.c create mode 100644 test/arm/neon/mls_lane.c create mode 100644 test/arm/neon/mlsl_high_lane.c create mode 100644 test/arm/neon/modify_c.txt create mode 100644 test/arm/neon/qdmlal.c create mode 100644 test/arm/neon/qdmlal_high.c create mode 100644 test/arm/neon/qdmlal_high_lane.c create mode 100644 test/arm/neon/qdmlal_high_n.c create mode 100644 test/arm/neon/qdmlal_lane.c create mode 100644 test/arm/neon/qdmlal_n.c create mode 100644 test/arm/neon/qdmlsl.c create mode 100644 test/arm/neon/qdmlsl_high.c create mode 100644 test/arm/neon/qdmlsl_high_lane.c create mode 100644 test/arm/neon/qdmlsl_high_n.c create mode 100644 test/arm/neon/qdmlsl_lane.c create mode 100644 test/arm/neon/qdmlsl_n.c create mode 100644 test/arm/neon/qdmull_high.c create mode 100644 test/arm/neon/qdmull_high_lane.c create mode 100644 test/arm/neon/qdmull_high_n.c create mode 100644 test/arm/neon/qdmull_lane.c create mode 100644 test/arm/neon/qdmull_n.c diff --git a/meson.build b/meson.build index 152e02d15..a9e325b3c 100644 --- a/meson.build +++ b/meson.build @@ -10,6 +10,8 @@ cxx = meson.get_compiler('cpp') simde_neon_families = [ 'aba', + 'abal', + 'abal_high', 'abd', 'abdl', 'abs', @@ -29,6 +31,8 @@ simde_neon_families = [ 'cadd_rot90', 'cage', 'cagt', + 'cale', + 'calt', 'ceq', 'ceqz', 'cge', @@ -51,6 +55,7 @@ simde_neon_families = [ 'cmla_rot270', 'cnt', 'cvt', + 'cvt_n', 'cvtn', 'combine', 'create', @@ -64,6 +69,9 @@ simde_neon_families = [ 'fma', 'fma_lane', 'fma_n', + 'fms', + 'fms_lane', + 'fms_n', 'get_high', 'get_lane', 'get_low', @@ -79,8 +87,13 @@ simde_neon_families = [ 'ld1q_x4', 'ld1', 'ld2', + 'ld2_dup', + 'ld2_lane', 'ld3', + 'ld3_dup', + 'ld3_lane', 'ld4', + 'ld4_dup', 'ld4_lane', 'max', 'maxnm', @@ -93,16 +106,20 @@ simde_neon_families = [ 'mla_n', 'mlal', 'mlal_high', + 'mlal_high_lane', 'mlal_high_n', 'mlal_lane', 'mlal_n', 'mls', + 'mls_lane', 'mls_n', 'mlsl', 'mlsl_high', + 'mlsl_high_lane', 'mlsl_high_n', 'mlsl_lane', 'mlsl_n', + #'mmlaq', 'movl', 'movl_high', 'movn', @@ -125,10 +142,27 @@ simde_neon_families = [ 'pmin', 'qadd', 'qabs', + 'qdmlal', + 'qdmlal_high', + 'qdmlal_high_lane', + 'qdmlal_high_n', + 'qdmlal_lane', + 'qdmlal_n', + 'qdmlsl', + 'qdmlsl_high', + 'qdmlsl_high_lane', + 'qdmlsl_high_n', + 'qdmlsl_lane', + 'qdmlsl_n', 'qdmulh', 'qdmulh_lane', 'qdmulh_n', 'qdmull', + 'qdmull_high', + 'qdmull_high_lane', + 'qdmull_high_n', + 'qdmull_lane', + 'qdmull_n', 'qrdmulh', 'qrdmulh_lane', 'qrdmulh_n', diff --git a/simde/arm/neon.h b/simde/arm/neon.h index eb71abe83..980fc9ccf 100644 --- a/simde/arm/neon.h +++ b/simde/arm/neon.h @@ -31,6 +31,8 @@ #include "neon/types.h" #include "neon/aba.h" +#include "neon/abal.h" +#include "neon/abal_high.h" #include "neon/abd.h" #include "neon/abdl.h" #include "neon/abs.h" @@ -50,6 +52,8 @@ #include "neon/cadd_rot90.h" #include "neon/cage.h" #include "neon/cagt.h" +#include "neon/cale.h" +#include "neon/calt.h" #include "neon/ceq.h" #include "neon/ceqz.h" #include "neon/cge.h" @@ -72,6 +76,7 @@ #include "neon/cmla_rot270.h" #include "neon/cnt.h" #include "neon/cvt.h" +#include "neon/cvt_n.h" #include "neon/cvtn.h" #include "neon/combine.h" #include "neon/create.h" @@ -85,6 +90,9 @@ #include "neon/fma.h" #include "neon/fma_lane.h" #include "neon/fma_n.h" +#include "neon/fms.h" +#include "neon/fms_lane.h" +#include "neon/fms_n.h" #include "neon/get_high.h" #include "neon/get_lane.h" #include "neon/get_low.h" @@ -100,8 +108,13 @@ #include "neon/ld1q_x3.h" #include "neon/ld1q_x4.h" #include "neon/ld2.h" +#include "neon/ld2_dup.h" +#include "neon/ld2_lane.h" #include "neon/ld3.h" +#include "neon/ld3_dup.h" +#include "neon/ld3_lane.h" #include "neon/ld4.h" +#include "neon/ld4_dup.h" #include "neon/ld4_lane.h" #include "neon/max.h" #include "neon/maxnm.h" @@ -114,16 +127,20 @@ #include "neon/mla_n.h" #include "neon/mlal.h" #include "neon/mlal_high.h" +#include "neon/mlal_high_lane.h" #include "neon/mlal_high_n.h" #include "neon/mlal_lane.h" #include "neon/mlal_n.h" #include "neon/mls.h" +#include "neon/mls_lane.h" #include "neon/mls_n.h" #include "neon/mlsl.h" #include "neon/mlsl_high.h" +#include "neon/mlsl_high_lane.h" #include "neon/mlsl_high_n.h" #include "neon/mlsl_lane.h" #include "neon/mlsl_n.h" +//#include "neon/mmlaq.h" #include "neon/movl.h" #include "neon/movl_high.h" #include "neon/movn.h" @@ -146,10 +163,27 @@ #include "neon/pmin.h" #include "neon/qabs.h" #include "neon/qadd.h" +#include "neon/qdmlal.h" +#include "neon/qdmlal_high.h" +#include "neon/qdmlal_high_lane.h" +#include "neon/qdmlal_high_n.h" +#include "neon/qdmlal_lane.h" +#include "neon/qdmlal_n.h" +#include "neon/qdmlsl.h" +#include "neon/qdmlsl_high.h" +#include "neon/qdmlsl_high_lane.h" +#include "neon/qdmlsl_high_n.h" +#include "neon/qdmlsl_lane.h" +#include "neon/qdmlsl_n.h" #include "neon/qdmulh.h" #include "neon/qdmulh_lane.h" #include "neon/qdmulh_n.h" #include "neon/qdmull.h" +#include "neon/qdmull_high.h" +#include "neon/qdmull_high_lane.h" +#include "neon/qdmull_high_n.h" +#include "neon/qdmull_lane.h" +#include "neon/qdmull_n.h" #include "neon/qrdmulh.h" #include "neon/qrdmulh_lane.h" #include "neon/qrdmulh_n.h" diff --git a/simde/arm/neon/abal.h b/simde/arm/neon/abal.h new file mode 100644 index 000000000..7e5093d37 --- /dev/null +++ b/simde/arm/neon/abal.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_ABAL_H) +#define SIMDE_ARM_NEON_ABAL_H + +#include "abdl.h" +#include "add.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vabal_s8(simde_int16x8_t a, simde_int8x8_t b, simde_int8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabal_s8(a, b, c); + #else + return simde_vaddq_s16(simde_vabdl_s8(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabal_s8 + #define vabal_s8(a, b, c) simde_vabal_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vabal_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabal_s16(a, b, c); + #else + return simde_vaddq_s32(simde_vabdl_s16(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabal_s16 + #define vabal_s16(a, b, c) simde_vabal_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vabal_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabal_s32(a, b, c); + #else + return simde_vaddq_s64(simde_vabdl_s32(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabal_s32 + #define vabal_s32(a, b, c) simde_vabal_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vabal_u8(simde_uint16x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabal_u8(a, b, c); + #else + return simde_vaddq_u16(simde_vabdl_u8(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabal_u8 + #define vabal_u8(a, b, c) simde_vabal_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vabal_u16(simde_uint32x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabal_u16(a, b, c); + #else + return simde_vaddq_u32(simde_vabdl_u16(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabal_u16 + #define vabal_u16(a, b, c) simde_vabal_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vabal_u32(simde_uint64x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabal_u32(a, b, c); + #else + return simde_vaddq_u64(simde_vabdl_u32(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabal_u32 + #define vabal_u32(a, b, c) simde_vabal_u32((a), (b), (c)) +#endif + + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_abal_H) */ diff --git a/simde/arm/neon/abal_high.h b/simde/arm/neon/abal_high.h new file mode 100644 index 000000000..78f538dc4 --- /dev/null +++ b/simde/arm/neon/abal_high.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_ABAL_HIGH_H) +#define SIMDE_ARM_NEON_ABAL_HIGH_H + +#include "abdl.h" +#include "add.h" +#include "movl_high.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vabal_high_s8(simde_int16x8_t a, simde_int8x16_t b, simde_int8x16_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vabal_high_s8(a, b, c); + #else + return simde_vaddq_s16(simde_vabdl_s8(simde_vget_high_s8(b), simde_vget_high_s8(c)), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vabal_high_s8 + #define vabal_high_s8(a, b, c) simde_vabal_high_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vabal_high_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vabal_high_s16(a, b, c); + #else + return simde_vaddq_s32(simde_vabdl_s16(simde_vget_high_s16(b), simde_vget_high_s16(c)), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vabal_high_s16 + #define vabal_high_s16(a, b, c) simde_vabal_high_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vabal_high_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vabal_high_s32(a, b, c); + #else + return simde_vaddq_s64(simde_vabdl_s32(simde_vget_high_s32(b), simde_vget_high_s32(c)), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vabal_high_s32 + #define vabal_high_s32(a, b, c) simde_vabal_high_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vabal_high_u8(simde_uint16x8_t a, simde_uint8x16_t b, simde_uint8x16_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vabal_high_u8(a, b, c); + #else + return simde_vaddq_u16(simde_vabdl_u8(simde_vget_high_u8(b), simde_vget_high_u8(c)), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vabal_high_u8 + #define vabal_high_u8(a, b, c) simde_vabal_high_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vabal_high_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x8_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vabal_high_u16(a, b, c); + #else + return simde_vaddq_u32(simde_vabdl_u16(simde_vget_high_u16(b), simde_vget_high_u16(c)), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vabal_high_u16 + #define vabal_high_u16(a, b, c) simde_vabal_high_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vabal_high_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x4_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vabal_high_u32(a, b, c); + #else + return simde_vaddq_u64(simde_vabdl_u32(simde_vget_high_u32(b), simde_vget_high_u32(c)), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vabal_high_u32 + #define vabal_high_u32(a, b, c) simde_vabal_high_u32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_abal_H) */ diff --git a/simde/arm/neon/cale.h b/simde/arm/neon/cale.h new file mode 100644 index 000000000..f2baa5158 --- /dev/null +++ b/simde/arm/neon/cale.h @@ -0,0 +1,165 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_CALE_H) +#define SIMDE_ARM_NEON_CALE_H + +#include "cage.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vcaleh_f16(simde_float16_t a, simde_float16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcaleh_f16(a, b); + #else + return simde_vcageh_f16(b, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcaleh_f16 + #define vcaleh_f16(a, b) simde_vcaleh_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcales_f32(simde_float32_t a, simde_float32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcales_f32(a, b); + #else + return simde_vcages_f32(b, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcales_f32 + #define vcales_f32(a, b) simde_vcales_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcaled_f64(simde_float64_t a, simde_float64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcaled_f64(a, b); + #else + return simde_vcaged_f64(b, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcaled_f64 + #define vcaled_f64(a, b) simde_vcaled_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcale_f16(simde_float16x4_t a, simde_float16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcale_f16(a, b); + #else + return simde_vcage_f16(b, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcale_f16 + #define vcale_f16(a, b) simde_vcale_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcale_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcale_f32(a, b); + #else + return simde_vcage_f32(b, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcale_f32 + #define vcale_f32(a, b) simde_vcale_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcale_f64(simde_float64x1_t a, simde_float64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcale_f64(a, b); + #else + return simde_vcage_f64(b, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcale_f64 + #define vcale_f64(a, b) simde_vcale_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcaleq_f16(simde_float16x8_t a, simde_float16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcaleq_f16(a, b); + #else + return simde_vcageq_f16(b, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcaleq_f16 + #define vcaleq_f16(a, b) simde_vcaleq_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcaleq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcaleq_f32(a, b); + #else + return simde_vcageq_f32(b, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcaleq_f32 + #define vcaleq_f32(a, b) simde_vcaleq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcaleq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcaleq_f64(a, b); + #else + return simde_vcageq_f64(b, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcaleq_f64 + #define vcaleq_f64(a, b) simde_vcaleq_f64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_cale_H) */ diff --git a/simde/arm/neon/calt.h b/simde/arm/neon/calt.h new file mode 100644 index 000000000..99fa38419 --- /dev/null +++ b/simde/arm/neon/calt.h @@ -0,0 +1,165 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_CALT_H) +#define SIMDE_ARM_NEON_CALT_H + +#include "cagt.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vcalth_f16(simde_float16_t a, simde_float16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcalth_f16(a, b); + #else + return simde_vcagth_f16(b, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcalth_f16 + #define vcalth_f16(a, b) simde_vcalth_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcalts_f32(simde_float32_t a, simde_float32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcalts_f32(a, b); + #else + return simde_vcagts_f32(b, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcalts_f32 + #define vcalts_f32(a, b) simde_vcalts_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcaltd_f64(simde_float64_t a, simde_float64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcaltd_f64(a, b); + #else + return simde_vcagtd_f64(b, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcaltd_f64 + #define vcaltd_f64(a, b) simde_vcaltd_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcalt_f16(simde_float16x4_t a, simde_float16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcalt_f16(a, b); + #else + return simde_vcagt_f16(b, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcalt_f16 + #define vcalt_f16(a, b) simde_vcalt_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcalt_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcalt_f32(a, b); + #else + return simde_vcagt_f32(b, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcalt_f32 + #define vcalt_f32(a, b) simde_vcalt_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcalt_f64(simde_float64x1_t a, simde_float64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcalt_f64(a, b); + #else + return simde_vcagt_f64(b, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcalt_f64 + #define vcalt_f64(a, b) simde_vcalt_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcaltq_f16(simde_float16x8_t a, simde_float16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcaltq_f16(a, b); + #else + return simde_vcagtq_f16(b, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcaltq_f16 + #define vcaltq_f16(a, b) simde_vcaltq_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcaltq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcaltq_f32(a, b); + #else + return simde_vcagtq_f32(b, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcaltq_f32 + #define vcaltq_f32(a, b) simde_vcaltq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcaltq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcaltq_f64(a, b); + #else + return simde_vcagtq_f64(b, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcaltq_f64 + #define vcaltq_f64(a, b) simde_vcaltq_f64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CAGT_H) */ diff --git a/simde/arm/neon/create.h b/simde/arm/neon/create.h index 57f6f6eba..2e7a13ccd 100644 --- a/simde/arm/neon/create.h +++ b/simde/arm/neon/create.h @@ -23,11 +23,10 @@ * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ -/* N.B. CM: vcreate_f16 and vcreate_bf16 are omitted as - * SIMDe has no 16-bit floating point support. - * Idem for the poly types. */ +/* Yi-Yen Chung: Added vcreate_f16 */ #if !defined(SIMDE_ARM_NEON_CREATE_H) #define SIMDE_ARM_NEON_CREATE_H @@ -152,6 +151,20 @@ simde_vcreate_u64(uint64_t a) { #define vcreate_u64(a) simde_vcreate_u64(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vcreate_f16(uint64_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcreate_f16(a); + #else + return simde_vreinterpret_f16_u64(simde_vdup_n_u64(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcreate_f16 + #define vcreate_f16(a) simde_vcreate_f16(a) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vcreate_f32(uint64_t a) { diff --git a/simde/arm/neon/cvt.h b/simde/arm/neon/cvt.h index 015be5f6c..9dec4a1ed 100644 --- a/simde/arm/neon/cvt.h +++ b/simde/arm/neon/cvt.h @@ -141,9 +141,9 @@ simde_vcvt_f64_f32(simde_float32x2_t a) { SIMDE_FUNCTION_ATTRIBUTES int16_t -simde_x_vcvts_s16_f16(simde_float16 a) { - #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_ARM_NEON_FP16) - return HEDLEY_STATIC_CAST(int16_t, a); +simde_vcvth_s16_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvth_s16_f16(a); #else simde_float32 af = simde_float16_to_float32(a); if (HEDLEY_UNLIKELY(af < HEDLEY_STATIC_CAST(simde_float32, INT16_MIN))) { @@ -157,12 +157,16 @@ simde_x_vcvts_s16_f16(simde_float16 a) { } #endif } +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_s16_f16 + #define vcvth_s16_f16(a) simde_vcvth_s16_f16(a) +#endif SIMDE_FUNCTION_ATTRIBUTES uint16_t -simde_x_vcvts_u16_f16(simde_float16 a) { - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint16_t, simde_float16_to_float32(a)); +simde_vcvth_u16_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvth_u16_f16(a); #else simde_float32 af = simde_float16_to_float32(a); if (HEDLEY_UNLIKELY(af < SIMDE_FLOAT32_C(0.0))) { @@ -176,6 +180,10 @@ simde_x_vcvts_u16_f16(simde_float16 a) { } #endif } +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_u16_f16 + #define vcvth_u16_f16(a) simde_vcvth_u16_f16(a) +#endif SIMDE_FUNCTION_ATTRIBUTES int32_t @@ -266,7 +274,7 @@ simde_vcvtd_s64_f64(simde_float64 a) { return INT64_MIN; } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float64, INT64_MAX))) { return INT64_MAX; - } else if (simde_math_isnanf(a)) { + } else if (simde_math_isnan(a)) { return 0; } else { return HEDLEY_STATIC_CAST(int64_t, a); @@ -344,7 +352,7 @@ simde_vcvt_s16_f16(simde_float16x4_t a) { #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vcvts_s16_f16(a_.values[i]); + r_.values[i] = simde_vcvth_s16_f16(a_.values[i]); } #endif @@ -396,7 +404,7 @@ simde_vcvt_u16_f16(simde_float16x4_t a) { #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vcvts_u16_f16(a_.values[i]); + r_.values[i] = simde_vcvth_u16_f16(a_.values[i]); } #endif @@ -501,7 +509,7 @@ simde_vcvtq_s16_f16(simde_float16x8_t a) { #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vcvts_s16_f16(a_.values[i]); + r_.values[i] = simde_vcvth_s16_f16(a_.values[i]); } #endif @@ -605,7 +613,7 @@ simde_vcvtq_u16_f16(simde_float16x8_t a) { #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vcvts_u16_f16(a_.values[i]); + r_.values[i] = simde_vcvth_u16_f16(a_.values[i]); } #endif @@ -1182,8 +1190,6 @@ simde_vcvtas_s32_f32(simde_float32 a) { } else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) { return 0; } else { - // Round to Nearest with Ties to Away (a.k.a Rounding away from zero) rounding mode. - // For example, 23.2 gets rounded to 24, and −23.2 gets rounded to −24. return HEDLEY_STATIC_CAST(int32_t, simde_math_roundf(a)); } #endif @@ -1199,13 +1205,13 @@ simde_vcvtas_u32_f32(simde_float32 a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcvtas_u32_f32(a); #else - if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { + if (HEDLEY_UNLIKELY(a < SIMDE_FLOAT32_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { return UINT32_MAX; } else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) { return 0; } else { - // Round to Nearest with Ties to Away (a.k.a Rounding away from zero) rounding mode. - // For example, 23.2 gets rounded to 24, and −23.2 gets rounded to −24. if(a < 0) return 0; return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundf(a)); } diff --git a/simde/arm/neon/cvt_n.h b/simde/arm/neon/cvt_n.h new file mode 100644 index 000000000..99b5cb4b1 --- /dev/null +++ b/simde/arm/neon/cvt_n.h @@ -0,0 +1,579 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_CVT_N_H) +#define SIMDE_ARM_NEON_CVT_N_H + +#include "types.h" +#include "cvt.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vcvt_n_s16_f16(simde_float16x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_int16x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvth_s16_f16(simde_float16_from_float32( + simde_float16_to_float32(a_.values[i]) * + HEDLEY_STATIC_CAST(float, pow(2, n)))); + } + + return simde_int16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vcvt_n_s16_f16(a, n) vcvt_n_s16_f16((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_n_s16_f16 + #define vcvt_n_s16_f16(a, n) simde_vcvt_n_s16_f16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vcvt_n_s32_f32(simde_float32x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_int32x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(float, pow(2, n))); + } + + return simde_int32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vcvt_n_s32_f32(a, n) vcvt_n_s32_f32((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcvt_n_s32_f32 + #define vcvt_n_s32_f32(a, n) simde_vcvt_n_s32_f32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vcvt_n_s64_f64(simde_float64x1_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_int64x1_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * pow(2, n)); + } + + return simde_int64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcvt_n_s64_f64(a, n) vcvt_n_s64_f64((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_n_s64_f64 + #define vcvt_n_s64_f64(a, n) simde_vcvt_n_s64_f64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcvt_n_u16_f16(simde_float16x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_uint16x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvth_u16_f16(simde_float16_from_float32( + simde_float16_to_float32(a_.values[i]) * + HEDLEY_STATIC_CAST(float, pow(2, n)))); + } + + return simde_uint16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vcvt_n_u16_f16(a, n) vcvt_n_u16_f16((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_n_u16_f16 + #define vcvt_n_u16_f16(a, n) simde_vcvt_n_u16_f16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcvt_n_u32_f32(simde_float32x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_uint32x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(float, pow(2, n))); + } + + return simde_uint32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vcvt_n_u32_f32(a, n) vcvt_n_u32_f32((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcvt_n_u32_f32 + #define vcvt_n_u32_f32(a, n) simde_vcvt_n_u32_f32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcvt_n_u64_f64(simde_float64x1_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_uint64x1_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtd_u64_f64(a_.values[i] * pow(2, n)); + } + + return simde_uint64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) + #define simde_vcvt_n_u64_f64(a, n) vcvt_n_u64_f64((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_n_u64_f64 + #define vcvt_n_u64_f64(a, n) simde_vcvt_n_u64_f64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vcvtq_n_s16_f16(simde_float16x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_int16x8_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvth_s16_f16(simde_float16_from_float32( + simde_float16_to_float32(a_.values[i]) * + HEDLEY_STATIC_CAST(float, pow(2, n)))); + } + + return simde_int16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vcvtq_n_s16_f16(a, n) vcvtq_n_s16_f16((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtq_n_s16_f16 + #define vcvtq_n_s16_f16(a, n) simde_vcvtq_n_s16_f16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vcvtq_n_s32_f32(simde_float32x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_int32x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(float, pow(2, n))); + } + + return simde_int32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vcvtq_n_s32_f32(a, n) vcvtq_n_s32_f32((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcvtq_n_s32_f32 + #define vcvtq_n_s32_f32(a, n) simde_vcvtq_n_s32_f32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vcvtq_n_s64_f64(simde_float64x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_int64x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * pow(2, n)); + } + + return simde_int64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcvtq_n_s64_f64(a, n) vcvtq_n_s64_f64((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtq_n_s64_f64 + #define vcvtq_n_s64_f64(a, n) simde_vcvtq_n_s64_f64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcvtq_n_u16_f16(simde_float16x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_uint16x8_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvth_u16_f16(simde_float16_from_float32( + simde_float16_to_float32(a_.values[i]) * + HEDLEY_STATIC_CAST(float, pow(2, n)))); + } + + return simde_uint16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) +#define simde_vcvtq_n_u16_f16(a, n) vcvtq_n_u16_f16((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtq_n_u16_f16 + #define vcvtq_n_u16_f16(a, n) simde_vcvtq_n_u16_f16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcvtq_n_u32_f32(simde_float32x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_uint32x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(float, pow(2, n))); + } + + return simde_uint32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) + #define simde_vcvtq_n_u32_f32(a, n) vcvtq_n_u32_f32((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcvtq_n_u32_f32 + #define vcvtq_n_u32_f32(a, n) simde_vcvtq_n_u32_f32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcvtq_n_u64_f64(simde_float64x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_uint64x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtd_u64_f64(a_.values[i] * pow(2, n)); + } + + return simde_uint64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) + #define simde_vcvtq_n_u64_f64(a, n) vcvtq_n_u64_f64((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtq_n_u64_f64 + #define vcvtq_n_u64_f64(a, n) simde_vcvtq_n_u64_f64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vcvt_n_f16_u16(simde_uint16x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + simde_uint16x4_private a_ = simde_uint16x4_to_private(a); + simde_float16x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n))); + } + + return simde_float16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vcvt_n_f16_u16(a, n) vcvt_n_f16_u16((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_n_f16_u16 + #define vcvt_n_f16_u16(a, n) simde_vcvt_n_f16_u16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vcvt_n_f16_s16(simde_int16x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + simde_int16x4_private a_ = simde_int16x4_to_private(a); + simde_float16x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n))); + } + + return simde_float16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vcvt_n_f16_s16(a, n) vcvt_n_f16_s16((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_n_f16_s16 + #define vcvt_n_f16_s16(a, n) simde_vcvt_n_f16_s16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vcvtq_n_f16_u16(simde_uint16x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + simde_float16x8_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n))); + } + + return simde_float16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vcvtq_n_f16_u16(a, n) vcvtq_n_f16_u16((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtq_n_f16_u16 + #define vcvtq_n_f16_u16(a, n) simde_vcvtq_n_f16_u16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vcvtq_n_f16_s16(simde_int16x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_float16x8_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, (a_.values[i] / pow(2, n)))); + } + + return simde_float16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vcvtq_n_f16_s16(a, n) vcvtq_n_f16_s16((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtq_n_f16_s16 + #define vcvtq_n_f16_s16(a, n) simde_vcvtq_n_f16_s16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcvt_n_f32_u32(simde_uint32x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + simde_uint32x2_private a_ = simde_uint32x2_to_private(a); + simde_float32x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); + } + + return simde_float32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vcvt_n_f32_u32(a, n) vcvt_n_f32_u32((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcvt_n_f32_u32 + #define vcvt_n_f32_u32(a, n) simde_vcvt_n_f32_u32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcvt_n_f32_s32(simde_int32x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + simde_int32x2_private a_ = simde_int32x2_to_private(a); + simde_float32x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); + } + + return simde_float32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vcvt_n_f32_s32(a, n) vcvt_n_f32_s32((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcvt_n_f32_s32 + #define vcvt_n_f32_s32(a, n) simde_vcvt_n_f32_s32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vcvt_n_f64_u64(simde_uint64x1_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + simde_uint64x1_private a_ = simde_uint64x1_to_private(a); + simde_float64x1_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); + } + + return simde_float64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcvt_n_f64_u64(a, n) vcvt_n_f64_u64((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_n_f64_u64 + #define vcvt_n_f64_u64(a, n) simde_vcvt_n_f64_u64((a), (n)) +#endif + +/* Eric: Skip this function since it will trigger a compiler error when using i686-linux-gnu-g++-11. +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vcvtq_n_f64_u64(simde_uint64x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + simde_float64x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); + } + + return simde_float64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcvtq_n_f64_u64(a, n) vcvtq_n_f64_u64((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtq_n_f64_u64 + #define vcvtq_n_f64_u64(a, n) simde_vcvtq_n_f64_u64((a), (n)) +#endif +*/ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vcvt_n_f64_s64(simde_int64x1_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + simde_int64x1_private a_ = simde_int64x1_to_private(a); + simde_float64x1_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); + } + + return simde_float64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcvt_n_f64_s64(a, n) vcvt_n_f64_s64((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_n_f64_s64 + #define vcvt_n_f64_s64(a, n) simde_vcvt_n_f64_s64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vcvtq_n_f64_s64(simde_int64x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_float64x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); + } + + return simde_float64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcvtq_n_f64_s64(a, n) vcvtq_n_f64_s64((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtq_n_f64_s64 + #define vcvtq_n_f64_s64(a, n) simde_vcvtq_n_f64_s64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcvtq_n_f32_s32(simde_int32x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_float32x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); + } + + return simde_float32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vcvtq_n_f32_s32(a, n) vcvtq_n_f32_s32((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcvtq_n_f32_s32 + #define vcvtq_n_f32_s32(a, n) simde_vcvtq_n_f32_s32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcvtq_n_f32_u32(simde_uint32x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + simde_float32x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); + } + + return simde_float32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vcvtq_n_f32_u32(a, n) vcvtq_n_f32_u32((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcvtq_n_f32_u32 + #define vcvtq_n_f32_u32(a, n) simde_vcvtq_n_f32_u32((a), (n)) +#endif + + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_NEON_CVT_N_H */ diff --git a/simde/arm/neon/cvtn.h b/simde/arm/neon/cvtn.h index ad7186b8e..651dee274 100644 --- a/simde/arm/neon/cvtn.h +++ b/simde/arm/neon/cvtn.h @@ -22,12 +22,16 @@ * * Copyright: * 2023 Michael R. Crusoe + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_CVTN_H) #define SIMDE_ARM_NEON_CVTN_H #include "types.h" +#include "cvt.h" +#include "calt.h" +#include "cagt.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS @@ -99,6 +103,180 @@ simde_vcvtnq_s64_f64(simde_float64x2_t a) { #define vcvtnq_s64_f64(a) simde_vcvtnq_s64_f64(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vcvtnh_s64_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtnh_s64_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int64_t, simde_math_roundevenf(simde_float16_to_float32(a))); + #else + simde_float32 a_ = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(a_ < HEDLEY_STATIC_CAST(simde_float32, INT64_MIN))) { + return INT64_MIN; + } else if (HEDLEY_UNLIKELY(a_ > HEDLEY_STATIC_CAST(simde_float32, INT64_MAX))) { + return INT64_MAX; + } else if (simde_math_isnanf(a_)) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int64_t, simde_math_roundevenf(a_)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtnh_s64_f16 + #define vcvtnh_s64_f16(a) simde_vcvtnh_s64_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vcvtnh_s32_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtnh_s32_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int32_t, simde_math_roundevenf(simde_float16_to_float32(a))); + #else + simde_float32 a_ = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(a_ < HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { + return INT32_MIN; + } else if (HEDLEY_UNLIKELY(a_ > HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { + return INT32_MAX; + } else if (simde_math_isnanf(a_)) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int32_t, simde_math_roundevenf(a_)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtnh_s32_f16 + #define vcvtnh_s32_f16(a) simde_vcvtnh_s32_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vcvtnh_s16_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtnh_s16_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int16_t, simde_math_roundevenf(simde_float16_to_float32(a))); + #else + simde_float32 a_ = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(a_ < HEDLEY_STATIC_CAST(simde_float32, INT16_MIN))) { + return INT16_MIN; + } else if (HEDLEY_UNLIKELY(a_ > HEDLEY_STATIC_CAST(simde_float32, INT16_MAX))) { + return INT16_MAX; + } else if (simde_math_isnanf(a_)) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int16_t, simde_math_roundevenf(a_)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtnh_s16_f16 + #define vcvtnh_s16_f16(a) simde_vcvtnh_s16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcvtnh_u64_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtnh_u64_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint64_t, simde_math_roundevenf(simde_float16_to_float32(a))); + #else + simde_float32 a_ = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(a_ < HEDLEY_STATIC_CAST(simde_float32, 0))) { + return 0; + } else if (HEDLEY_UNLIKELY(a_ > HEDLEY_STATIC_CAST(simde_float32, UINT64_MAX))) { + return UINT64_MAX; + } else if (simde_math_isnanf(a_)) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint64_t, simde_math_roundevenf(a_)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtnh_u64_f16 + #define vcvtnh_u64_f16(a) simde_vcvtnh_u64_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcvtnh_u32_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtnh_u32_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundevenf(simde_float16_to_float32(a))); + #else + simde_float32 a_ = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(a_ < HEDLEY_STATIC_CAST(simde_float32, 0))) { + return 0; + } else if (HEDLEY_UNLIKELY(a_ > HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { + return UINT32_MAX; + } else if (simde_math_isnanf(a_)) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundevenf(a_)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtnh_u32_f16 + #define vcvtnh_u32_f16(a) simde_vcvtnh_u32_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vcvtnh_u16_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtnh_u16_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint16_t, simde_math_roundevenf(simde_float16_to_float32(a))); + #else + simde_float32 a_ = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(a_ < HEDLEY_STATIC_CAST(simde_float32, 0))) { + return 0; + } else if (HEDLEY_UNLIKELY(a_ > HEDLEY_STATIC_CAST(simde_float32, UINT16_MAX))) { + return UINT16_MAX; + } else if (simde_math_isnanf(a_)) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint16_t, simde_math_roundevenf(a_)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtnh_u16_f16 + #define vcvtnh_u16_f16(a) simde_vcvtnh_u16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vcvtns_s32_f32(simde_float32 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtns_s32_f32(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int32_t, simde_math_roundevenf(a)); + #else + if (HEDLEY_UNLIKELY(a < HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { + return INT32_MIN; + } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { + return INT32_MAX; + } else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int32_t, simde_math_roundevenf(a)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtns_s32_f32 + #define vcvtns_s32_f32(a) simde_vcvtns_s32_f32(a) +#endif + SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vcvtns_u32_f32(simde_float32 a) { @@ -111,7 +289,7 @@ simde_vcvtns_u32_f32(simde_float32 a) { return 0; } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { return UINT32_MAX; - } else if (simde_math_isnanf(a)) { + } else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) { return 0; } else { return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundevenf(a)); @@ -157,6 +335,30 @@ simde_vcvtnq_u32_f32(simde_float32x4_t a) { #define vcvtnq_u32_f32(a) simde_vcvtnq_u32_f32(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vcvtnd_s64_f64(simde_float64 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtnd_s64_f64(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int64_t, simde_math_roundeven(a)); + #else + if (HEDLEY_UNLIKELY(a < HEDLEY_STATIC_CAST(simde_float64, INT64_MIN))) { + return INT64_MIN; + } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float64, INT64_MAX))) { + return INT64_MAX; + } else if (simde_math_isnan(a)) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int64_t, simde_math_roundeven(a)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtnd_s64_f64 + #define vcvtnd_s64_f64(a) simde_vcvtnd_s64_f64(a) +#endif + SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_vcvtnd_u64_f64(simde_float64 a) { @@ -215,6 +417,182 @@ simde_vcvtnq_u64_f64(simde_float64x2_t a) { #define vcvtnq_u64_f64(a) simde_vcvtnq_u64_f64(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vcvtnq_s16_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtnq_s16_f16(a); + #else + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_int16x8_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtnh_s16_f16(a_.values[i]); + } + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtnq_s16_f16 + #define vcvtnq_s16_f16(a) simde_vcvtnq_s16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vcvtn_s16_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtn_s16_f16(a); + #else + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_int16x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtnh_s16_f16(a_.values[i]); + } + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtn_s16_f16 + #define vcvtn_s16_f16(a) simde_vcvtn_s16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcvtnq_u16_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtnq_u16_f16(a); + #else + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_uint16x8_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtnh_u16_f16(a_.values[i]); + } + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtnq_u16_f16 + #define vcvtnq_u16_f16(a) simde_vcvtnq_u16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcvtn_u16_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtn_u16_f16(a); + #else + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_uint16x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtnh_u16_f16(a_.values[i]); + } + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtn_u16_f16 + #define vcvtn_u16_f16(a) simde_vcvtn_u16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcvtn_u32_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vcvtn_u32_f32(a); + #else + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_uint32x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtns_u32_f32(a_.values[i]); + } + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtn_u32_f32 + #define vcvtn_u32_f32(a) simde_vcvtn_u32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vcvtn_s32_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vcvtn_s32_f32(a); + #else + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_int32x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtns_s32_f32(a_.values[i]); + } + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtn_s32_f32 + #define vcvtn_s32_f32(a) simde_vcvtn_s32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vcvtn_s64_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtn_s64_f64(a); + #else + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_int64x1_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtnd_s64_f64(a_.values[i]); + } + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtn_s64_f64 + #define vcvtn_s64_f64(a) simde_vcvtn_s64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcvtn_u64_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtn_u64_f64(a); + #else + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_uint64x1_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtnd_u64_f64(a_.values[i]); + } + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtn_u64_f64 + #define vcvtn_u64_f64(a) simde_vcvtn_u64_f64(a) +#endif + SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP diff --git a/simde/arm/neon/dup_lane.h b/simde/arm/neon/dup_lane.h index bc1720518..61a88503c 100644 --- a/simde/arm/neon/dup_lane.h +++ b/simde/arm/neon/dup_lane.h @@ -22,6 +22,7 @@ * * Copyright: * 2020-2021 Evan Nemerson + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_DUP_LANE_H) @@ -146,6 +147,46 @@ simde_vdupd_lane_u64(simde_uint64x1_t vec, const int lane) #define vdupd_lane_u64(vec, lane) simde_vdupd_lane_u64((vec), (lane)) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16_t +simde_vduph_lane_f16(simde_float16x4_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_float16x4_to_private(vec).values[lane]; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vduph_lane_f16(vec, lane) vduph_lane_f16(vec, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vduph_lane_f16 + #define vduph_lane_f16(vec, lane) simde_vduph_lane_f16((vec), (lane)) +#endif + +// simde_vdup_lane_f16 +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vdup_lane_f16(vec, lane) vdup_lane_f16(vec, lane) +#else + #define simde_vdup_lane_f16(vec, lane) simde_vdup_n_f16(simde_vduph_lane_f16(vec, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdup_lane_f16 + #define vdup_lane_f16(vec, lane) simde_vdup_lane_f16((vec), (lane)) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vdupq_lane_f16(simde_float16x4_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_vdupq_n_f16(simde_float16x4_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) +#define simde_vdupq_lane_f16(vec, lane) vdupq_lane_f16(vec, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_lane_f16 + #define vdupq_lane_f16(vec, lane) simde_vdupq_lane_f16((vec), (lane)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float64_t simde_vdupd_lane_f64(simde_float64x1_t vec, const int lane) diff --git a/simde/arm/neon/ext.h b/simde/arm/neon/ext.h index 446919355..3b1c3c9ab 100644 --- a/simde/arm/neon/ext.h +++ b/simde/arm/neon/ext.h @@ -34,6 +34,32 @@ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vext_f16(simde_float16x4_t a, simde_float16x4_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + simde_float16x4_t r; + SIMDE_CONSTIFY_4_(vext_f16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); + return r; + #else + simde_float16x4_private + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3]; + } + return simde_float16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vext_f16 + #define vext_f16(a, b, n) simde_vext_f16((a), (b), (n)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vext_f32(simde_float32x2_t a, simde_float32x2_t b, const int n) diff --git a/simde/arm/neon/fma.h b/simde/arm/neon/fma.h index 7c1a00118..aaf9e04e0 100644 --- a/simde/arm/neon/fma.h +++ b/simde/arm/neon/fma.h @@ -35,6 +35,20 @@ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ +SIMDE_FUNCTION_ATTRIBUTES +simde_float16_t +simde_vfmah_f16(simde_float16_t a, simde_float16_t b, simde_float16_t c) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) + return vfmah_f16(a, b, c); + #else + return simde_vaddh_f16(a, simde_vmulh_f16(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vfmah_f16 + #define vfmah_f16(a, b, c) simde_vfmah_f16(a, b, c) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vfma_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) { @@ -63,6 +77,20 @@ simde_vfma_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) { #define vfma_f64(a, b, c) simde_vfma_f64(a, b, c) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vfma_f16(simde_float16x4_t a, simde_float16x4_t b, simde_float16x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) + return vfma_f16(a, b, c); + #else + return simde_vadd_f16(a, simde_vmul_f16(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vfma_f16 + #define vfma_f16(a, b, c) simde_vfma_f16(a, b, c) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float16x8_t simde_vfmaq_f16(simde_float16x8_t a, simde_float16x8_t b, simde_float16x8_t c) { diff --git a/simde/arm/neon/fma_lane.h b/simde/arm/neon/fma_lane.h index bf4edcbb9..e937f715c 100644 --- a/simde/arm/neon/fma_lane.h +++ b/simde/arm/neon/fma_lane.h @@ -22,6 +22,7 @@ * * Copyright: * 2021 Atharva Nimbalkar +* 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_FMA_LANE_H) @@ -83,6 +84,52 @@ SIMDE_BEGIN_DECLS_ #define vfmad_laneq_f64(a, b, v, lane) simde_vfmad_laneq_f64(a, b, v, lane) #endif +/* simde_vfmah_lane_f16 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vfmah_lane_f16(a, b, v, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmah_lane_f16(a, b, v, lane)) + #else + #define simde_vfmah_lane_f16(a, b, v, lane) vfmah_lane_f16((a), (b), (v), (lane)) + #endif +#else + #define simde_vfmah_lane_f16(a, b, v, lane) \ + simde_vget_lane_f16( \ + simde_vadd_f16( \ + simde_vdup_n_f16(a), \ + simde_vdup_n_f16(simde_vmulh_lane_f16(b, v, lane)) \ + ), \ + 0 \ + ) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmah_lane_f16 + #define vfmah_lane_f16(a, b, v, lane) simde_vfmah_lane_f16(a, b, v, lane) +#endif + +/* simde_vfmah_laneq_f16 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vfmah_laneq_f16(a, b, v, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmah_laneq_f16(a, b, v, lane)) + #else + #define simde_vfmah_laneq_f16(a, b, v, lane) vfmah_laneq_f16((a), (b), (v), (lane)) + #endif +#else + #define simde_vfmah_laneq_f16(a, b, v, lane) \ + simde_vget_lane_f16( \ + simde_vadd_f16( \ + simde_vdup_n_f16(a), \ + simde_vdup_n_f16(simde_vmulh_laneq_f16(b, v, lane)) \ + ), \ + 0 \ + ) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmah_laneq_f16 + #define vfmah_laneq_f16(a, b, v, lane) simde_vfmah_laneq_f16(a, b, v, lane) +#endif + /* simde_vfmas_lane_f32 */ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) @@ -129,6 +176,17 @@ SIMDE_BEGIN_DECLS_ #define vfmas_laneq_f32(a, b, v, lane) simde_vfmas_laneq_f32(a, b, v, lane) #endif +/* simde_vfma_lane_f16 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vfma_lane_f16(a, b, v, lane) vfma_lane_f16(a, b, v, lane) +#else + #define simde_vfma_lane_f16(a, b, v, lane) simde_vadd_f16(a, simde_vmul_lane_f16(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfma_lane_f16 + #define vfma_lane_f16(a, b, v, lane) simde_vfma_lane_f16(a, b, v, lane) +#endif + /* simde_vfma_lane_f32 */ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) #define simde_vfma_lane_f32(a, b, v, lane) vfma_lane_f32(a, b, v, lane) @@ -151,6 +209,17 @@ SIMDE_BEGIN_DECLS_ #define vfma_lane_f64(a, b, v, lane) simde_vfma_lane_f64(a, b, v, lane) #endif +/* simde_vfma_laneq_f16 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vfma_laneq_f16(a, b, v, lane) vfma_laneq_f16((a), (b), (v), (lane)) +#else + #define simde_vfma_laneq_f16(a, b, v, lane) simde_vadd_f16(a, simde_vmul_laneq_f16(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfma_laneq_f16 + #define vfma_laneq_f16(a, b, v, lane) simde_vfma_laneq_f16(a, b, v, lane) +#endif + /* simde_vfma_laneq_f32 */ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) #define simde_vfma_laneq_f32(a, b, v, lane) vfma_laneq_f32((a), (b), (v), (lane)) @@ -184,6 +253,17 @@ SIMDE_BEGIN_DECLS_ #define vfmaq_lane_f64(a, b, v, lane) simde_vfmaq_lane_f64(a, b, v, lane) #endif +/* simde_vfmaq_lane_f16 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vfmaq_lane_f16(a, b, v, lane) vfmaq_lane_f16((a), (b), (v), (lane)) +#else + #define simde_vfmaq_lane_f16(a, b, v, lane) simde_vaddq_f16(a, simde_vmulq_lane_f16(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmaq_lane_f16 + #define vfmaq_lane_f16(a, b, v, lane) simde_vfmaq_lane_f16(a, b, v, lane) +#endif + /* simde_vfmaq_lane_f32 */ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) #define simde_vfmaq_lane_f32(a, b, v, lane) vfmaq_lane_f32((a), (b), (v), (lane)) @@ -195,6 +275,18 @@ SIMDE_BEGIN_DECLS_ #define vfmaq_lane_f32(a, b, v, lane) simde_vfmaq_lane_f32(a, b, v, lane) #endif +/* simde_vfmaq_laneq_f16 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vfmaq_laneq_f16(a, b, v, lane) vfmaq_laneq_f16((a), (b), (v), (lane)) +#else + #define simde_vfmaq_laneq_f16(a, b, v, lane) \ + simde_vaddq_f16(a, simde_vmulq_laneq_f16(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmaq_laneq_f16 + #define vfmaq_laneq_f16(a, b, v, lane) simde_vfmaq_laneq_f16(a, b, v, lane) +#endif + /* simde_vfmaq_laneq_f32 */ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) #define simde_vfmaq_laneq_f32(a, b, v, lane) vfmaq_laneq_f32((a), (b), (v), (lane)) diff --git a/simde/arm/neon/fma_n.h b/simde/arm/neon/fma_n.h index d94f01ac3..0a23407c6 100644 --- a/simde/arm/neon/fma_n.h +++ b/simde/arm/neon/fma_n.h @@ -22,6 +22,7 @@ * * Copyright: * 2021 Evan Nemerson +* 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_FMA_N_H) @@ -35,6 +36,34 @@ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vfma_n_f16(simde_float16x4_t a, simde_float16x4_t b, simde_float16_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) && defined(SIMDE_ARM_NEON_FP16) + return vfma_n_f16(a, b, c); + #else + return simde_vfma_f16(a, b, simde_vdup_n_f16(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfma_n_f16 + #define vfma_n_f16(a, b, c) simde_vfma_n_f16(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vfmaq_n_f16(simde_float16x8_t a, simde_float16x8_t b, simde_float16_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) && defined(SIMDE_ARM_NEON_FP16) + return vfmaq_n_f16(a, b, c); + #else + return simde_vfmaq_f16(a, b, simde_vdupq_n_f16(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmaq_n_f16 + #define vfmaq_n_f16(a, b, c) simde_vfmaq_n_f16(a, b, c) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vfma_n_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32_t c) { diff --git a/simde/arm/neon/fms.h b/simde/arm/neon/fms.h new file mode 100644 index 000000000..0ad265c3d --- /dev/null +++ b/simde/arm/neon/fms.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: MIT +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, copy, +* modify, merge, publish, distribute, sublicense, and/or sell copies +* of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +* +* Copyright: +* 2023 Yi-Yen Chung (Copyright owned by Andes Technology) +*/ + +#if !defined(SIMDE_ARM_NEON_FMS_H) +#define SIMDE_ARM_NEON_FMS_H + +#include "add.h" +#include "mul.h" +#include "neg.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16_t +simde_vfmsh_f16(simde_float16_t a, simde_float16_t b, simde_float16_t c) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) + return vfmsh_f16(a, b, c); + #else + return simde_vaddh_f16(a, simde_vnegh_f16(simde_vmulh_f16(b, c))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vfmsh_f16 + #define vfmsh_f16(a, b, c) simde_vfmsh_f16(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vfms_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) + return vfms_f32(a, b, c); + #else + return simde_vadd_f32(a, simde_vneg_f32(simde_vmul_f32(b, c))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vfms_f32 + #define vfms_f32(a, b, c) simde_vfms_f32(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vfms_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) + return vfms_f64(a, b, c); + #else + return simde_vadd_f64(a, simde_vneg_f64(simde_vmul_f64(b, c))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vfms_f64 + #define vfms_f64(a, b, c) simde_vfms_f64(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vfms_f16(simde_float16x4_t a, simde_float16x4_t b, simde_float16x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) + return vfms_f16(a, b, c); + #else + return simde_vadd_f16(a, simde_vneg_f16(simde_vmul_f16(b, c))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vfms_f16 + #define vfms_f16(a, b, c) simde_vfms_f16(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vfmsq_f16(simde_float16x8_t a, simde_float16x8_t b, simde_float16x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) + return vfmsq_f16(a, b, c); + #else + return simde_vaddq_f16(a, simde_vnegq_f16(simde_vmulq_f16(b, c))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vfmsq_f16 + #define vfmsq_f16(a, b, c) simde_vfmsq_f16(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vfmsq_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) + return vfmsq_f32(a, b, c); + #else + return simde_vaddq_f32(a, simde_vnegq_f32(simde_vmulq_f32(b, c))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vfmsq_f32 + #define vfmsq_f32(a, b, c) simde_vfmsq_f32(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vfmsq_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64x2_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) + return vfmsq_f64(a, b, c); + #else + return simde_vaddq_f64(a, simde_vnegq_f64(simde_vmulq_f64(b, c))); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmsq_f64 + #define vfmsq_f64(a, b, c) simde_vfmsq_f64(a, b, c) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_FMS_H) */ diff --git a/simde/arm/neon/fms_lane.h b/simde/arm/neon/fms_lane.h new file mode 100644 index 000000000..05ef96ae3 --- /dev/null +++ b/simde/arm/neon/fms_lane.h @@ -0,0 +1,316 @@ +/* SPDX-License-Identifier: MIT +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, copy, +* modify, merge, publish, distribute, sublicense, and/or sell copies +* of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +* +* Copyright: +* 2023 Yi-Yen Chung (Copyright owned by Andes Technology) +*/ + +#if !defined(SIMDE_ARM_NEON_FMS_LANE_H) +#define SIMDE_ARM_NEON_FMS_LANE_H + +#include "sub.h" +#include "dup_n.h" +#include "get_lane.h" +#include "mul.h" +#include "mul_lane.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +/* simde_vfmsd_lane_f64 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vfmsd_lane_f64(a, b, v, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmsd_lane_f64(a, b, v, lane)) + #else + #define simde_vfmsd_lane_f64(a, b, v, lane) vfmsd_lane_f64((a), (b), (v), (lane)) + #endif +#else + #define simde_vfmsd_lane_f64(a, b, v, lane) \ + simde_vget_lane_f64( \ + simde_vsub_f64( \ + simde_vdup_n_f64(a), \ + simde_vdup_n_f64(simde_vmuld_lane_f64(b, v, lane)) \ + ), \ + 0 \ + ) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmsd_lane_f64 + #define vfmsd_lane_f64(a, b, v, lane) simde_vfmsd_lane_f64(a, b, v, lane) +#endif + +/* simde_vfmsd_laneq_f64 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vfmsd_laneq_f64(a, b, v, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmsd_laneq_f64(a, b, v, lane)) + #else + #define simde_vfmsd_laneq_f64(a, b, v, lane) vfmsd_laneq_f64((a), (b), (v), (lane)) + #endif +#else + #define simde_vfmsd_laneq_f64(a, b, v, lane) \ + simde_vget_lane_f64( \ + simde_vsub_f64( \ + simde_vdup_n_f64(a), \ + simde_vdup_n_f64(simde_vmuld_laneq_f64(b, v, lane)) \ + ), \ + 0 \ + ) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmsd_laneq_f64 + #define vfmsd_laneq_f64(a, b, v, lane) simde_vfmsd_laneq_f64(a, b, v, lane) +#endif + +/* simde_vfmsh_lane_f16 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vfmsh_lane_f16(a, b, v, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmsh_lane_f16(a, b, v, lane)) + #else + #define simde_vfmsh_lane_f16(a, b, v, lane) vfmsh_lane_f16((a), (b), (v), (lane)) + #endif +#else + #define simde_vfmsh_lane_f16(a, b, v, lane) \ + simde_vget_lane_f16( \ + simde_vsub_f16( \ + simde_vdup_n_f16(a), \ + simde_vdup_n_f16(simde_vmulh_lane_f16(b, v, lane)) \ + ), \ + 0 \ + ) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmsh_lane_f16 + #define vfmsh_lane_f16(a, b, v, lane) simde_vfmsh_lane_f16(a, b, v, lane) +#endif + +/* simde_vfmsh_laneq_f16 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vfmsh_laneq_f16(a, b, v, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmsh_laneq_f16(a, b, v, lane)) + #else + #define simde_vfmsh_laneq_f16(a, b, v, lane) vfmsh_laneq_f16((a), (b), (v), (lane)) + #endif +#else + #define simde_vfmsh_laneq_f16(a, b, v, lane) \ + simde_vget_lane_f16( \ + simde_vsub_f16( \ + simde_vdup_n_f16(a), \ + simde_vdup_n_f16(simde_vmulh_laneq_f16(b, v, lane)) \ + ), \ + 0 \ + ) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmsh_laneq_f16 + #define vfmsh_laneq_f16(a, b, v, lane) simde_vfmsh_laneq_f16(a, b, v, lane) +#endif + +/* simde_vfmss_lane_f32 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vfmss_lane_f32(a, b, v, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmss_lane_f32(a, b, v, lane)) + #else + #define simde_vfmss_lane_f32(a, b, v, lane) vfmss_lane_f32((a), (b), (v), (lane)) + #endif +#else + #define simde_vfmss_lane_f32(a, b, v, lane) \ + simde_vget_lane_f32( \ + simde_vsub_f32( \ + simde_vdup_n_f32(a), \ + simde_vdup_n_f32(simde_vmuls_lane_f32(b, v, lane)) \ + ), \ + 0 \ + ) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmss_lane_f32 + #define vfmss_lane_f32(a, b, v, lane) simde_vfmss_lane_f32(a, b, v, lane) +#endif + +/* simde_vfmss_laneq_f32 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vfmss_laneq_f32(a, b, v, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmss_laneq_f32(a, b, v, lane)) + #else + #define simde_vfmss_laneq_f32(a, b, v, lane) vfmss_laneq_f32((a), (b), (v), (lane)) + #endif +#else + #define simde_vfmss_laneq_f32(a, b, v, lane) \ + simde_vget_lane_f32( \ + simde_vsub_f32( \ + simde_vdup_n_f32(a), \ + simde_vdup_n_f32(simde_vmuls_laneq_f32(b, v, lane)) \ + ), \ + 0 \ + ) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmss_laneq_f32 + #define vfmss_laneq_f32(a, b, v, lane) simde_vfmss_laneq_f32(a, b, v, lane) +#endif + +/* simde_vfms_lane_f16 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vfms_lane_f16(a, b, v, lane) vfms_lane_f16(a, b, v, lane) +#else + #define simde_vfms_lane_f16(a, b, v, lane) simde_vsub_f16(a, simde_vmul_lane_f16(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfms_lane_f16 + #define vfms_lane_f16(a, b, v, lane) simde_vfms_lane_f16(a, b, v, lane) +#endif + +/* simde_vfms_lane_f32 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) + #define simde_vfms_lane_f32(a, b, v, lane) vfms_lane_f32(a, b, v, lane) +#else + #define simde_vfms_lane_f32(a, b, v, lane) simde_vsub_f32(a, simde_vmul_lane_f32(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfms_lane_f32 + #define vfms_lane_f32(a, b, v, lane) simde_vfms_lane_f32(a, b, v, lane) +#endif + +/* simde_vfms_lane_f64 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) + #define simde_vfms_lane_f64(a, b, v, lane) vfms_lane_f64((a), (b), (v), (lane)) +#else + #define simde_vfms_lane_f64(a, b, v, lane) simde_vsub_f64(a, simde_vmul_lane_f64(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfms_lane_f64 + #define vfms_lane_f64(a, b, v, lane) simde_vfms_lane_f64(a, b, v, lane) +#endif + +/* simde_vfms_laneq_f16 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vfms_laneq_f16(a, b, v, lane) vfms_laneq_f16((a), (b), (v), (lane)) +#else + #define simde_vfms_laneq_f16(a, b, v, lane) simde_vsub_f16(a, simde_vmul_laneq_f16(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfms_laneq_f16 + #define vfms_laneq_f16(a, b, v, lane) simde_vfms_laneq_f16(a, b, v, lane) +#endif + +/* simde_vfms_laneq_f32 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) + #define simde_vfms_laneq_f32(a, b, v, lane) vfms_laneq_f32((a), (b), (v), (lane)) +#else + #define simde_vfms_laneq_f32(a, b, v, lane) simde_vsub_f32(a, simde_vmul_laneq_f32(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfms_laneq_f32 + #define vfms_laneq_f32(a, b, v, lane) simde_vfms_laneq_f32(a, b, v, lane) +#endif + +/* simde_vfms_laneq_f64 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) + #define simde_vfms_laneq_f64(a, b, v, lane) vfms_laneq_f64((a), (b), (v), (lane)) +#else + #define simde_vfms_laneq_f64(a, b, v, lane) simde_vsub_f64(a, simde_vmul_laneq_f64(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfms_laneq_f64 + #define vfms_laneq_f64(a, b, v, lane) simde_vfms_laneq_f64(a, b, v, lane) +#endif + +/* simde_vfmsq_lane_f64 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) + #define simde_vfmsq_lane_f64(a, b, v, lane) vfmsq_lane_f64((a), (b), (v), (lane)) +#else + #define simde_vfmsq_lane_f64(a, b, v, lane) simde_vsubq_f64(a, simde_vmulq_lane_f64(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmsq_lane_f64 + #define vfmsq_lane_f64(a, b, v, lane) simde_vfmsq_lane_f64(a, b, v, lane) +#endif + +/* simde_vfmsq_lane_f16 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vfmsq_lane_f16(a, b, v, lane) vfmsq_lane_f16((a), (b), (v), (lane)) +#else + #define simde_vfmsq_lane_f16(a, b, v, lane) simde_vsubq_f16(a, simde_vmulq_lane_f16(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmsq_lane_f16 + #define vfmsq_lane_f16(a, b, v, lane) simde_vfmsq_lane_f16(a, b, v, lane) +#endif + +/* simde_vfmsq_lane_f32 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) + #define simde_vfmsq_lane_f32(a, b, v, lane) vfmsq_lane_f32((a), (b), (v), (lane)) +#else + #define simde_vfmsq_lane_f32(a, b, v, lane) simde_vsubq_f32(a, simde_vmulq_lane_f32(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmsq_lane_f32 + #define vfmsq_lane_f32(a, b, v, lane) simde_vfmsq_lane_f32(a, b, v, lane) +#endif + +/* simde_vfmsq_laneq_f16 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vfmsq_laneq_f16(a, b, v, lane) vfmsq_laneq_f16((a), (b), (v), (lane)) +#else + #define simde_vfmsq_laneq_f16(a, b, v, lane) \ + simde_vsubq_f16(a, simde_vmulq_laneq_f16(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmsq_laneq_f16 + #define vfmsq_laneq_f16(a, b, v, lane) simde_vfmsq_laneq_f16(a, b, v, lane) +#endif + +/* simde_vfmsq_laneq_f32 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) + #define simde_vfmsq_laneq_f32(a, b, v, lane) vfmsq_laneq_f32((a), (b), (v), (lane)) +#else + #define simde_vfmsq_laneq_f32(a, b, v, lane) \ + simde_vsubq_f32(a, simde_vmulq_laneq_f32(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmsq_laneq_f32 + #define vfmsq_laneq_f32(a, b, v, lane) simde_vfmsq_laneq_f32(a, b, v, lane) +#endif + +/* simde_vfmsq_laneq_f64 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) + #define simde_vfmsq_laneq_f64(a, b, v, lane) vfmsq_laneq_f64((a), (b), (v), (lane)) +#else + #define simde_vfmsq_laneq_f64(a, b, v, lane) \ + simde_vsubq_f64(a, simde_vmulq_laneq_f64(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmsq_laneq_f64 + #define vfmsq_laneq_f64(a, b, v, lane) simde_vfmsq_laneq_f64(a, b, v, lane) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_FMS_LANE_H) */ diff --git a/simde/arm/neon/fms_n.h b/simde/arm/neon/fms_n.h new file mode 100644 index 000000000..eb95818f8 --- /dev/null +++ b/simde/arm/neon/fms_n.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: MIT +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, copy, +* modify, merge, publish, distribute, sublicense, and/or sell copies +* of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +* +* Copyright: +* 2023 Yi-Yen Chung (Copyright owned by Andes Technology) +*/ + +#if !defined(SIMDE_ARM_NEON_FMS_N_H) +#define SIMDE_ARM_NEON_FMS_N_H + +#include "types.h" +#include "dup_n.h" +#include "fms.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vfms_n_f16(simde_float16x4_t a, simde_float16x4_t b, simde_float16_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) && defined(SIMDE_ARM_NEON_FP16) + return vfms_n_f16(a, b, c); + #else + return simde_vfms_f16(a, b, simde_vdup_n_f16(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfms_n_f16 + #define vfms_n_f16(a, b, c) simde_vfms_n_f16(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vfmsq_n_f16(simde_float16x8_t a, simde_float16x8_t b, simde_float16_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) && defined(SIMDE_ARM_NEON_FP16) + return vfmsq_n_f16(a, b, c); + #else + return simde_vfmsq_f16(a, b, simde_vdupq_n_f16(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmsq_n_f16 + #define vfmsq_n_f16(a, b, c) simde_vfmsq_n_f16(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vfms_n_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) + return vfms_n_f32(a, b, c); + #else + return simde_vfms_f32(a, b, simde_vdup_n_f32(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vfms_n_f32 + #define vfms_n_f32(a, b, c) simde_vfms_n_f32(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vfms_n_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vfms_n_f64(a, b, c); + #else + return simde_vfms_f64(a, b, simde_vdup_n_f64(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vfms_n_f64 + #define vfms_n_f64(a, b, c) simde_vfms_n_f64(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vfmsq_n_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) + return vfmsq_n_f32(a, b, c); + #else + return simde_vfmsq_f32(a, b, simde_vdupq_n_f32(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vfmsq_n_f32 + #define vfmsq_n_f32(a, b, c) simde_vfmsq_n_f32(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vfmsq_n_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vfmsq_n_f64(a, b, c); + #else + return simde_vfmsq_f64(a, b, simde_vdupq_n_f64(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vfmsq_n_f64 + #define vfmsq_n_f64(a, b, c) simde_vfmsq_n_f64(a, b, c) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_FMS_N_H) */ diff --git a/simde/arm/neon/get_lane.h b/simde/arm/neon/get_lane.h index a5f1bab49..c992ecf95 100644 --- a/simde/arm/neon/get_lane.h +++ b/simde/arm/neon/get_lane.h @@ -276,7 +276,7 @@ simde_vgetq_lane_f16(simde_float16x8_t v, const int lane) simde_float16_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - SIMDE_CONSTIFY_8_(vget_lane_f16, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT16_VALUE(0.0)), lane, v); + SIMDE_CONSTIFY_8_(vgetq_lane_f16, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT16_VALUE(0.0)), lane, v); #else simde_float16x8_private v_ = simde_float16x8_to_private(v); diff --git a/simde/arm/neon/ld1_dup.h b/simde/arm/neon/ld1_dup.h index ce6da6d2e..8e8655cdf 100644 --- a/simde/arm/neon/ld1_dup.h +++ b/simde/arm/neon/ld1_dup.h @@ -36,6 +36,20 @@ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vld1_dup_f16(simde_float16 const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vld1_dup_f16(ptr); + #else + return simde_vdup_n_f16(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_dup_f16 + #define vld1_dup_f16(a) simde_vld1_dup_f16((a)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vld1_dup_f32(simde_float32 const * ptr) { diff --git a/simde/arm/neon/ld1_lane.h b/simde/arm/neon/ld1_lane.h index 4e36caf52..8332703aa 100644 --- a/simde/arm/neon/ld1_lane.h +++ b/simde/arm/neon/ld1_lane.h @@ -22,6 +22,7 @@ * * Copyright: * 2021 Zhi An Ng (Copyright owned by Google, LLC) + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_LD1_LANE_H) @@ -161,6 +162,22 @@ simde_uint64x1_t simde_vld1_lane_u64(uint64_t const *ptr, simde_uint64x1_t src, #define vld1_lane_u64(ptr, src, lane) simde_vld1_lane_u64((ptr), (src), (lane)) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t simde_vld1_lane_f16(simde_float16_t const *ptr, simde_float16x4_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_float16x4_private r = simde_float16x4_to_private(src); + r.values[lane] = *ptr; + return simde_float16x4_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vld1_lane_f16(ptr, src, lane) vld1_lane_f16(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_lane_f16 + #define vld1_lane_f16(ptr, src, lane) simde_vld1_lane_f16((ptr), (src), (lane)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vld1_lane_f32(simde_float32_t const *ptr, simde_float32x2_t src, const int lane) @@ -321,6 +338,22 @@ simde_uint64x2_t simde_vld1q_lane_u64(uint64_t const *ptr, simde_uint64x2_t src, #define vld1q_lane_u64(ptr, src, lane) simde_vld1q_lane_u64((ptr), (src), (lane)) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t simde_vld1q_lane_f16(simde_float16_t const *ptr, simde_float16x8_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_float16x8_private r = simde_float16x8_to_private(src); + r.values[lane] = *ptr; + return simde_float16x8_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vld1q_lane_f16(ptr, src, lane) vld1q_lane_f16(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_lane_f16 + #define vld1q_lane_f16(ptr, src, lane) simde_vld1q_lane_f16((ptr), (src), (lane)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vld1q_lane_f32(simde_float32_t const *ptr, simde_float32x4_t src, const int lane) diff --git a/simde/arm/neon/ld1_x2.h b/simde/arm/neon/ld1_x2.h index faf52a700..10c858e74 100644 --- a/simde/arm/neon/ld1_x2.h +++ b/simde/arm/neon/ld1_x2.h @@ -24,6 +24,7 @@ * 2020 Evan Nemerson * 2021 Zhi An Ng (Copyright owned by Google, LLC) * 2021 Décio Luiz Gazzoni Filho + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_LD1_X2_H) @@ -40,6 +41,29 @@ SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4x2_t +simde_vld1_f16_x2(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) + return vld1_f16_x2(ptr); + #else + simde_float16x4_private a_[2]; + for (size_t i = 0; i < 8; i++) { + a_[i / 4].values[i % 4] = ptr[i]; + } + simde_float16x4x2_t s_ = { { simde_float16x4_from_private(a_[0]), + simde_float16x4_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_f16_x2 + #define vld1_f16_x2(a) simde_vld1_f16_x2((a)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2x2_t simde_vld1_f32_x2(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(4)]) { diff --git a/simde/arm/neon/ld1_x3.h b/simde/arm/neon/ld1_x3.h index ee5e5df73..52d864d6e 100644 --- a/simde/arm/neon/ld1_x3.h +++ b/simde/arm/neon/ld1_x3.h @@ -23,6 +23,7 @@ * Copyright: * 2020 Evan Nemerson * 2021 Zhi An Ng (Copyright owned by Google, LLC) + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_LD1_X3_H) @@ -39,6 +40,30 @@ SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4x3_t +simde_vld1_f16_x3(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(12)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) + return vld1_f16_x3(ptr); + #else + simde_float16x4_private a_[3]; + for (size_t i = 0; i < 12; i++) { + a_[i / 4].values[i % 4] = ptr[i]; + } + simde_float16x4x3_t s_ = { { simde_float16x4_from_private(a_[0]), + simde_float16x4_from_private(a_[1]), + simde_float16x4_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_f16_x3 + #define vld1_f16_x3(a) simde_vld1_f16_x3((a)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2x3_t simde_vld1_f32_x3(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(6)]) { diff --git a/simde/arm/neon/ld1_x4.h b/simde/arm/neon/ld1_x4.h index 7f81f2997..3b7edb6bb 100644 --- a/simde/arm/neon/ld1_x4.h +++ b/simde/arm/neon/ld1_x4.h @@ -24,6 +24,7 @@ * 2020 Evan Nemerson * 2021 Zhi An Ng (Copyright owned by Google, LLC) * 2021 Décio Luiz Gazzoni Filho + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_LD1_X4_H) @@ -40,6 +41,31 @@ SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4x4_t +simde_vld1_f16_x4(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) + return vld1_f16_x4(ptr); + #else + simde_float16x4_private a_[4]; + for (size_t i = 0; i < 16; i++) { + a_[i / 4].values[i % 4] = ptr[i]; + } + simde_float16x4x4_t s_ = { { simde_float16x4_from_private(a_[0]), + simde_float16x4_from_private(a_[1]), + simde_float16x4_from_private(a_[2]), + simde_float16x4_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_f16_x4 + #define vld1_f16_x4(a) simde_vld1_f16_x4((a)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2x4_t simde_vld1_f32_x4(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(8)]) { diff --git a/simde/arm/neon/ld1q_x2.h b/simde/arm/neon/ld1q_x2.h index d71a0c66d..9f16aec54 100644 --- a/simde/arm/neon/ld1q_x2.h +++ b/simde/arm/neon/ld1q_x2.h @@ -24,6 +24,7 @@ * 2020 Evan Nemerson * 2021 Zhi An Ng (Copyright owned by Google, LLC) * 2021 Décio Luiz Gazzoni Filho + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_LD1Q_X2_H) @@ -40,6 +41,30 @@ SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8x2_t +simde_vld1q_f16_x2(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + defined(SIMDE_ARM_NEON_FP16) + return vld1q_f16_x2(ptr); + #else + simde_float16x8_private a_[2]; + for (size_t i = 0; i < 16; i++) { + a_[i / 8].values[i % 8] = ptr[i]; + } + simde_float16x8x2_t s_ = { { simde_float16x8_from_private(a_[0]), + simde_float16x8_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_f16_x2 + #define vld1q_f16_x2(a) simde_vld1q_f16_x2((a)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x4x2_t simde_vld1q_f32_x2(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(8)]) { diff --git a/simde/arm/neon/ld1q_x3.h b/simde/arm/neon/ld1q_x3.h index 2005d37d0..01242d5ff 100644 --- a/simde/arm/neon/ld1q_x3.h +++ b/simde/arm/neon/ld1q_x3.h @@ -23,6 +23,7 @@ * Copyright: * 2020 Evan Nemerson * 2021 Zhi An Ng (Copyright owned by Google, LLC) + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_LD1Q_X3_H) @@ -39,6 +40,30 @@ SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8x3_t +simde_vld1q_f16_x3(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(24)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) + return vld1q_f16_x3(ptr); + #else + simde_float16x8_private a_[3]; + for (size_t i = 0; i < 24; i++) { + a_[i / 8].values[i % 8] = ptr[i]; + } + simde_float16x8x3_t s_ = { { simde_float16x8_from_private(a_[0]), + simde_float16x8_from_private(a_[1]), + simde_float16x8_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_f16_x3 + #define vld1q_f16_x3(a) simde_vld1q_f16_x3((a)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x4x3_t simde_vld1q_f32_x3(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(12)]) { diff --git a/simde/arm/neon/ld1q_x4.h b/simde/arm/neon/ld1q_x4.h index 7bc1d6192..cd97d1479 100644 --- a/simde/arm/neon/ld1q_x4.h +++ b/simde/arm/neon/ld1q_x4.h @@ -24,6 +24,7 @@ * 2020 Evan Nemerson * 2021 Zhi An Ng (Copyright owned by Google, LLC) * 2021 Décio Luiz Gazzoni Filho + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_LD1Q_X4_H) @@ -40,6 +41,31 @@ SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8x4_t +simde_vld1q_f16_x4(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(32)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) + return vld1q_f16_x4(ptr); + #else + simde_float16x8_private a_[4]; + for (size_t i = 0; i < 32; i++) { + a_[i / 8].values[i % 8] = ptr[i]; + } + simde_float16x8x4_t s_ = { { simde_float16x8_from_private(a_[0]), + simde_float16x8_from_private(a_[1]), + simde_float16x8_from_private(a_[2]), + simde_float16x8_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_f16_x4 + #define vld1q_f16_x4(a) simde_vld1q_f16_x4((a)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x4x4_t simde_vld1q_f32_x4(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(16)]) { diff --git a/simde/arm/neon/ld2.h b/simde/arm/neon/ld2.h index 7ba934259..a304004f4 100644 --- a/simde/arm/neon/ld2.h +++ b/simde/arm/neon/ld2.h @@ -342,6 +342,33 @@ simde_vld2_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { #define vld2_u64(a) simde_vld2_u64((a)) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4x2_t +simde_vld2_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vld2_f16(ptr); + #else + simde_float16x4_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_float16x4x2_t r = { { + simde_float16x4_from_private(r_[0]), + simde_float16x4_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_f16 + #define vld2_f16(a) simde_vld2_f16((a)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2x2_t simde_vld2_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { diff --git a/simde/arm/neon/ld2_dup.h b/simde/arm/neon/ld2_dup.h new file mode 100644 index 000000000..8d30ce080 --- /dev/null +++ b/simde/arm/neon/ld2_dup.h @@ -0,0 +1,458 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_LD2_DUP_H) +#define SIMDE_ARM_NEON_LD2_DUP_H + +#include "dup_n.h" +#include "reinterpret.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4x2_t +simde_vld2_dup_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vld2_dup_f16(ptr); + #else + simde_float16x4x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdup_n_f16(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_dup_f16 + #define vld2_dup_f16(a) simde_vld2_dup_f16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2x2_t +simde_vld2_dup_f32(simde_float32 const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2_dup_f32(ptr); + #else + simde_float32x2x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdup_n_f32(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_dup_f32 + #define vld2_dup_f32(a) simde_vld2_dup_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1x2_t +simde_vld2_dup_f64(simde_float64 const * ptr) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld2_dup_f64(ptr); + #else + simde_float64x1x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdup_n_f64(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld2_dup_f64 + #define vld2_dup_f64(a) simde_vld2_dup_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8x2_t +simde_vld2_dup_s8(int8_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2_dup_s8(ptr); + #else + simde_int8x8x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdup_n_s8(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_dup_s8 + #define vld2_dup_s8(a) simde_vld2_dup_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4x2_t +simde_vld2_dup_s16(int16_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2_dup_s16(ptr); + #else + simde_int16x4x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdup_n_s16(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_dup_s16 + #define vld2_dup_s16(a) simde_vld2_dup_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2x2_t +simde_vld2_dup_s32(int32_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2_dup_s32(ptr); + #else + simde_int32x2x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdup_n_s32(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_dup_s32 + #define vld2_dup_s32(a) simde_vld2_dup_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1x2_t +simde_vld2_dup_s64(int64_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2_dup_s64(ptr); + #else + simde_int64x1x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdup_n_s64(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_dup_s64 + #define vld2_dup_s64(a) simde_vld2_dup_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8x2_t +simde_vld2_dup_u8(uint8_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2_dup_u8(ptr); + #else + simde_uint8x8x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdup_n_u8(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_dup_u8 + #define vld2_dup_u8(a) simde_vld2_dup_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4x2_t +simde_vld2_dup_u16(uint16_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2_dup_u16(ptr); + #else + simde_uint16x4x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdup_n_u16(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_dup_u16 + #define vld2_dup_u16(a) simde_vld2_dup_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2x2_t +simde_vld2_dup_u32(uint32_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2_dup_u32(ptr); + #else + simde_uint32x2x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdup_n_u32(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_dup_u32 + #define vld2_dup_u32(a) simde_vld2_dup_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1x2_t +simde_vld2_dup_u64(uint64_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2_dup_u64(ptr); + #else + simde_uint64x1x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdup_n_u64(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_dup_u64 + #define vld2_dup_u64(a) simde_vld2_dup_u64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8x2_t +simde_vld2q_dup_f16(simde_float16 const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vld2q_dup_f16(ptr); + #else + simde_float16x8x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdupq_n_f16(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_dup_f16 + #define vld2q_dup_f16(a) simde_vld2q_dup_f16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4x2_t +simde_vld2q_dup_f32(simde_float32 const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld2q_dup_f32(ptr); + #else + simde_float32x4x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdupq_n_f32(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_dup_f32 + #define vld2q_dup_f32(a) simde_vld2q_dup_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2x2_t +simde_vld2q_dup_f64(simde_float64 const * ptr) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld2q_dup_f64(ptr); + #else + simde_float64x2x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdupq_n_f64(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_dup_f64 + #define vld2q_dup_f64(a) simde_vld2q_dup_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16x2_t +simde_vld2q_dup_s8(int8_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld2q_dup_s8(ptr); + #else + simde_int8x16x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdupq_n_s8(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_dup_s8 + #define vld2q_dup_s8(a) simde_vld2q_dup_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8x2_t +simde_vld2q_dup_s16(int16_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld2q_dup_s16(ptr); + #else + simde_int16x8x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdupq_n_s16(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_dup_s16 + #define vld2q_dup_s16(a) simde_vld2q_dup_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4x2_t +simde_vld2q_dup_s32(int32_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld2q_dup_s32(ptr); + #else + simde_int32x4x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdupq_n_s32(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_dup_s32 + #define vld2q_dup_s32(a) simde_vld2q_dup_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2x2_t +simde_vld2q_dup_s64(int64_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld2q_dup_s64(ptr); + #else + simde_int64x2x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdupq_n_s64(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_dup_s64 + #define vld2q_dup_s64(a) simde_vld2q_dup_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16x2_t +simde_vld2q_dup_u8(uint8_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld2q_dup_u8(ptr); + #else + simde_uint8x16x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdupq_n_u8(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_dup_u8 + #define vld2q_dup_u8(a) simde_vld2q_dup_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8x2_t +simde_vld2q_dup_u16(uint16_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld2q_dup_u16(ptr); + #else + simde_uint16x8x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdupq_n_u16(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_dup_u16 + #define vld2q_dup_u16(a) simde_vld2q_dup_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4x2_t +simde_vld2q_dup_u32(uint32_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld2q_dup_u32(ptr); + #else + simde_uint32x4x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdupq_n_u32(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_dup_u32 + #define vld2q_dup_u32(a) simde_vld2q_dup_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2x2_t +simde_vld2q_dup_u64(uint64_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld2q_dup_u64(ptr); + #else + simde_uint64x2x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + r.val[i] = simde_vdupq_n_u64(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_dup_u64 + #define vld2q_dup_u64(a) simde_vld2q_dup_u64((a)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_LD2_DUP_H) */ diff --git a/simde/arm/neon/ld2_lane.h b/simde/arm/neon/ld2_lane.h new file mode 100644 index 000000000..9c9adb6ef --- /dev/null +++ b/simde/arm/neon/ld2_lane.h @@ -0,0 +1,478 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_LD2_LANE_H) +#define SIMDE_ARM_NEON_LD2_LANE_H + +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8x2_t simde_vld2_lane_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_int8x8x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_int8x8x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_int8x8_private tmp_ = simde_int8x8_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int8x8_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld2_lane_s8(ptr, src, lane) vld2_lane_s8(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_lane_s8 + #define vld2_lane_s8(ptr, src, lane) simde_vld2_lane_s8((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4x2_t simde_vld2_lane_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_int16x4x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int16x4x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_int16x4_private tmp_ = simde_int16x4_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int16x4_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld2_lane_s16(ptr, src, lane) vld2_lane_s16(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_lane_s16 + #define vld2_lane_s16(ptr, src, lane) simde_vld2_lane_s16((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2x2_t simde_vld2_lane_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_int32x2x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int32x2x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_int32x2_private tmp_ = simde_int32x2_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int32x2_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld2_lane_s32(ptr, src, lane) vld2_lane_s32(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_lane_s32 + #define vld2_lane_s32(ptr, src, lane) simde_vld2_lane_s32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1x2_t simde_vld2_lane_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_int64x1x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_int64x1x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_int64x1_private tmp_ = simde_int64x1_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int64x1_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld2_lane_s64(ptr, src, lane) vld2_lane_s64(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld2_lane_s64 + #define vld2_lane_s64(ptr, src, lane) simde_vld2_lane_s64((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8x2_t simde_vld2_lane_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint8x8x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_uint8x8x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_uint8x8_private tmp_ = simde_uint8x8_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint8x8_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld2_lane_u8(ptr, src, lane) vld2_lane_u8(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_lane_u8 + #define vld2_lane_u8(ptr, src, lane) simde_vld2_lane_u8((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4x2_t simde_vld2_lane_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint16x4x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_uint16x4x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_uint16x4_private tmp_ = simde_uint16x4_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint16x4_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld2_lane_u16(ptr, src, lane) vld2_lane_u16(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_lane_u16 + #define vld2_lane_u16(ptr, src, lane) simde_vld2_lane_u16((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2x2_t simde_vld2_lane_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint32x2x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_uint32x2x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_uint32x2_private tmp_ = simde_uint32x2_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint32x2_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld2_lane_u32(ptr, src, lane) vld2_lane_u32(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_lane_u32 + #define vld2_lane_u32(ptr, src, lane) simde_vld2_lane_u32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1x2_t simde_vld2_lane_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint64x1x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_uint64x1x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_uint64x1_private tmp_ = simde_uint64x1_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint64x1_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld2_lane_u64(ptr, src, lane) vld2_lane_u64(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld2_lane_u64 + #define vld2_lane_u64(ptr, src, lane) simde_vld2_lane_u64((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4x2_t simde_vld2_lane_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_float16x4x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_float16x4x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_float16x4_private tmp_ = simde_float16x4_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_float16x4_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vld2_lane_f16(ptr, src, lane) vld2_lane_f16(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_lane_f16 + #define vld2_lane_f16(ptr, src, lane) simde_vld2_lane_f16((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2x2_t simde_vld2_lane_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_float32x2x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x2x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_float32x2_private tmp_ = simde_float32x2_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_float32x2_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld2_lane_f32(ptr, src, lane) vld2_lane_f32(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_lane_f32 + #define vld2_lane_f32(ptr, src, lane) simde_vld2_lane_f32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1x2_t simde_vld2_lane_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_float64x1x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_float64x1x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_float64x1_private tmp_ = simde_float64x1_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_float64x1_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld2_lane_f64(ptr, src, lane) vld2_lane_f64(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld2_lane_f64 + #define vld2_lane_f64(ptr, src, lane) simde_vld2_lane_f64((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16x2_t simde_vld2q_lane_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_int8x16x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + simde_int8x16x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_int8x16_private tmp_ = simde_int8x16_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int8x16_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld2q_lane_s8(ptr, src, lane) vld2q_lane_s8(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_lane_s8 + #define vld2q_lane_s8(ptr, src, lane) simde_vld2q_lane_s8((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8x2_t simde_vld2q_lane_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_int16x8x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_int16x8x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_int16x8_private tmp_ = simde_int16x8_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int16x8_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld2q_lane_s16(ptr, src, lane) vld2q_lane_s16(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_lane_s16 + #define vld2q_lane_s16(ptr, src, lane) simde_vld2q_lane_s16((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4x2_t simde_vld2q_lane_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_int32x4x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int32x4x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_int32x4_private tmp_ = simde_int32x4_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int32x4_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld2q_lane_s32(ptr, src, lane) vld2q_lane_s32(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2q_lane_s32 + #define vld2q_lane_s32(ptr, src, lane) simde_vld2q_lane_s32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2x2_t simde_vld2q_lane_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_int64x2x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int64x2x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_int64x2_private tmp_ = simde_int64x2_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int64x2_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld2q_lane_s64(ptr, src, lane) vld2q_lane_s64(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_lane_s64 + #define vld2q_lane_s64(ptr, src, lane) simde_vld2q_lane_s64((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16x2_t simde_vld2q_lane_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint8x16x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + simde_uint8x16x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_uint8x16_private tmp_ = simde_uint8x16_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint8x16_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld2q_lane_u8(ptr, src, lane) vld2q_lane_u8(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_lane_u8 + #define vld2q_lane_u8(ptr, src, lane) simde_vld2q_lane_u8((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8x2_t simde_vld2q_lane_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint16x8x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_uint16x8x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_uint16x8_private tmp_ = simde_uint16x8_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint16x8_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld2q_lane_u16(ptr, src, lane) vld2q_lane_u16(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_lane_u16 + #define vld2q_lane_u16(ptr, src, lane) simde_vld2q_lane_u16((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4x2_t simde_vld2q_lane_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint32x4x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_uint32x4x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_uint32x4_private tmp_ = simde_uint32x4_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint32x4_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld2q_lane_u32(ptr, src, lane) vld2q_lane_u32(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2q_lane_u32 + #define vld2q_lane_u32(ptr, src, lane) simde_vld2q_lane_u32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2x2_t simde_vld2q_lane_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint64x2x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_uint64x2x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_uint64x2_private tmp_ = simde_uint64x2_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint64x2_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld2q_lane_u64(ptr, src, lane) vld2q_lane_u64(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_lane_u64 + #define vld2q_lane_u64(ptr, src, lane) simde_vld2q_lane_u64((ptr), (src), (lane)) +#endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8x2_t simde_vld2q_lane_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_float16x8x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_float16x8x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_float16x8_private tmp_ = simde_float16x8_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_float16x8_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vld2q_lane_f16(ptr, src, lane) vld2q_lane_f16(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2q_lane_f16 + #define vld2q_lane_f16(ptr, src, lane) simde_vld2q_lane_f16((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4x2_t simde_vld2q_lane_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_float32x4x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_float32x4x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_float32x4_private tmp_ = simde_float32x4_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_float32x4_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld2q_lane_f32(ptr, src, lane) vld2q_lane_f32(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2q_lane_f32 + #define vld2q_lane_f32(ptr, src, lane) simde_vld2q_lane_f32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2x2_t simde_vld2q_lane_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(2)], simde_float64x2x2_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float64x2x2_t r; + + for (size_t i = 0 ; i < 2 ; i++) { + simde_float64x2_private tmp_ = simde_float64x2_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_float64x2_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld2q_lane_f64(ptr, src, lane) vld2q_lane_f64(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_lane_f64 + #define vld2q_lane_f64(ptr, src, lane) simde_vld2q_lane_f64((ptr), (src), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_LD2_LANE_H) */ diff --git a/simde/arm/neon/ld3.h b/simde/arm/neon/ld3.h index e13eff1db..6ab02f321 100644 --- a/simde/arm/neon/ld3.h +++ b/simde/arm/neon/ld3.h @@ -23,6 +23,7 @@ * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_LD3_H) @@ -40,6 +41,34 @@ SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4x3_t +simde_vld3_f16(simde_float16 const *ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vld3_f16(ptr); + #else + simde_float16x4_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_float16x4x3_t r = { { + simde_float16x4_from_private(r_[0]), + simde_float16x4_from_private(r_[1]), + simde_float16x4_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_f16 + #define vld3_f16(a) simde_vld3_f16((a)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2x3_t simde_vld3_f32(simde_float32 const *ptr) { diff --git a/simde/arm/neon/ld3_dup.h b/simde/arm/neon/ld3_dup.h new file mode 100644 index 000000000..49507d69c --- /dev/null +++ b/simde/arm/neon/ld3_dup.h @@ -0,0 +1,458 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_LD3_DUP_H) +#define SIMDE_ARM_NEON_LD3_DUP_H + +#include "dup_n.h" +#include "reinterpret.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4x3_t +simde_vld3_dup_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(3)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vld3_dup_f16(ptr); + #else + simde_float16x4x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdup_n_f16(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_dup_f16 + #define vld3_dup_f16(a) simde_vld3_dup_f16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2x3_t +simde_vld3_dup_f32(simde_float32 const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3_dup_f32(ptr); + #else + simde_float32x2x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdup_n_f32(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_dup_f32 + #define vld3_dup_f32(a) simde_vld3_dup_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1x3_t +simde_vld3_dup_f64(simde_float64 const * ptr) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld3_dup_f64(ptr); + #else + simde_float64x1x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdup_n_f64(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld3_dup_f64 + #define vld3_dup_f64(a) simde_vld3_dup_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8x3_t +simde_vld3_dup_s8(int8_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3_dup_s8(ptr); + #else + simde_int8x8x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdup_n_s8(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_dup_s8 + #define vld3_dup_s8(a) simde_vld3_dup_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4x3_t +simde_vld3_dup_s16(int16_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3_dup_s16(ptr); + #else + simde_int16x4x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdup_n_s16(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_dup_s16 + #define vld3_dup_s16(a) simde_vld3_dup_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2x3_t +simde_vld3_dup_s32(int32_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3_dup_s32(ptr); + #else + simde_int32x2x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdup_n_s32(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_dup_s32 + #define vld3_dup_s32(a) simde_vld3_dup_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1x3_t +simde_vld3_dup_s64(int64_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3_dup_s64(ptr); + #else + simde_int64x1x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdup_n_s64(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_dup_s64 + #define vld3_dup_s64(a) simde_vld3_dup_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8x3_t +simde_vld3_dup_u8(uint8_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3_dup_u8(ptr); + #else + simde_uint8x8x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdup_n_u8(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_dup_u8 + #define vld3_dup_u8(a) simde_vld3_dup_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4x3_t +simde_vld3_dup_u16(uint16_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3_dup_u16(ptr); + #else + simde_uint16x4x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdup_n_u16(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_dup_u16 + #define vld3_dup_u16(a) simde_vld3_dup_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2x3_t +simde_vld3_dup_u32(uint32_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3_dup_u32(ptr); + #else + simde_uint32x2x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdup_n_u32(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_dup_u32 + #define vld3_dup_u32(a) simde_vld3_dup_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1x3_t +simde_vld3_dup_u64(uint64_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3_dup_u64(ptr); + #else + simde_uint64x1x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdup_n_u64(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_dup_u64 + #define vld3_dup_u64(a) simde_vld3_dup_u64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8x3_t +simde_vld3q_dup_f16(simde_float16 const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vld3q_dup_f16(ptr); + #else + simde_float16x8x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdupq_n_f16(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_dup_f16 + #define vld3q_dup_f16(a) simde_vld3q_dup_f16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4x3_t +simde_vld3q_dup_f32(simde_float32 const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld3q_dup_f32(ptr); + #else + simde_float32x4x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdupq_n_f32(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_dup_f32 + #define vld3q_dup_f32(a) simde_vld3q_dup_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2x3_t +simde_vld3q_dup_f64(simde_float64 const * ptr) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld3q_dup_f64(ptr); + #else + simde_float64x2x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdupq_n_f64(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_dup_f64 + #define vld3q_dup_f64(a) simde_vld3q_dup_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16x3_t +simde_vld3q_dup_s8(int8_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld3q_dup_s8(ptr); + #else + simde_int8x16x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdupq_n_s8(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_dup_s8 + #define vld3q_dup_s8(a) simde_vld3q_dup_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8x3_t +simde_vld3q_dup_s16(int16_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld3q_dup_s16(ptr); + #else + simde_int16x8x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdupq_n_s16(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_dup_s16 + #define vld3q_dup_s16(a) simde_vld3q_dup_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4x3_t +simde_vld3q_dup_s32(int32_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld3q_dup_s32(ptr); + #else + simde_int32x4x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdupq_n_s32(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_dup_s32 + #define vld3q_dup_s32(a) simde_vld3q_dup_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2x3_t +simde_vld3q_dup_s64(int64_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld3q_dup_s64(ptr); + #else + simde_int64x2x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdupq_n_s64(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_dup_s64 + #define vld3q_dup_s64(a) simde_vld3q_dup_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16x3_t +simde_vld3q_dup_u8(uint8_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld3q_dup_u8(ptr); + #else + simde_uint8x16x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdupq_n_u8(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_dup_u8 + #define vld3q_dup_u8(a) simde_vld3q_dup_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8x3_t +simde_vld3q_dup_u16(uint16_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld3q_dup_u16(ptr); + #else + simde_uint16x8x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdupq_n_u16(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_dup_u16 + #define vld3q_dup_u16(a) simde_vld3q_dup_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4x3_t +simde_vld3q_dup_u32(uint32_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld3q_dup_u32(ptr); + #else + simde_uint32x4x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdupq_n_u32(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_dup_u32 + #define vld3q_dup_u32(a) simde_vld3q_dup_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2x3_t +simde_vld3q_dup_u64(uint64_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld3q_dup_u64(ptr); + #else + simde_uint64x2x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + r.val[i] = simde_vdupq_n_u64(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_dup_u64 + #define vld3q_dup_u64(a) simde_vld3q_dup_u64((a)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_LD3_DUP_H) */ diff --git a/simde/arm/neon/ld3_lane.h b/simde/arm/neon/ld3_lane.h new file mode 100644 index 000000000..87f803eb6 --- /dev/null +++ b/simde/arm/neon/ld3_lane.h @@ -0,0 +1,478 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_LD3_LANE_H) +#define SIMDE_ARM_NEON_LD3_LANE_H + +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8x3_t simde_vld3_lane_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_int8x8x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_int8x8x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_int8x8_private tmp_ = simde_int8x8_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int8x8_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld3_lane_s8(ptr, src, lane) vld3_lane_s8(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_lane_s8 + #define vld3_lane_s8(ptr, src, lane) simde_vld3_lane_s8((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4x3_t simde_vld3_lane_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_int16x4x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int16x4x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_int16x4_private tmp_ = simde_int16x4_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int16x4_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld3_lane_s16(ptr, src, lane) vld3_lane_s16(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_lane_s16 + #define vld3_lane_s16(ptr, src, lane) simde_vld3_lane_s16((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2x3_t simde_vld3_lane_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_int32x2x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int32x2x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_int32x2_private tmp_ = simde_int32x2_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int32x2_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld3_lane_s32(ptr, src, lane) vld3_lane_s32(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_lane_s32 + #define vld3_lane_s32(ptr, src, lane) simde_vld3_lane_s32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1x3_t simde_vld3_lane_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_int64x1x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_int64x1x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_int64x1_private tmp_ = simde_int64x1_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int64x1_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld3_lane_s64(ptr, src, lane) vld3_lane_s64(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld3_lane_s64 + #define vld3_lane_s64(ptr, src, lane) simde_vld3_lane_s64((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8x3_t simde_vld3_lane_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint8x8x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_uint8x8x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_uint8x8_private tmp_ = simde_uint8x8_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint8x8_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld3_lane_u8(ptr, src, lane) vld3_lane_u8(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_lane_u8 + #define vld3_lane_u8(ptr, src, lane) simde_vld3_lane_u8((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4x3_t simde_vld3_lane_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint16x4x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_uint16x4x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_uint16x4_private tmp_ = simde_uint16x4_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint16x4_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld3_lane_u16(ptr, src, lane) vld3_lane_u16(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_lane_u16 + #define vld3_lane_u16(ptr, src, lane) simde_vld3_lane_u16((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2x3_t simde_vld3_lane_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint32x2x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_uint32x2x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_uint32x2_private tmp_ = simde_uint32x2_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint32x2_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld3_lane_u32(ptr, src, lane) vld3_lane_u32(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_lane_u32 + #define vld3_lane_u32(ptr, src, lane) simde_vld3_lane_u32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1x3_t simde_vld3_lane_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint64x1x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_uint64x1x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_uint64x1_private tmp_ = simde_uint64x1_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint64x1_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld3_lane_u64(ptr, src, lane) vld3_lane_u64(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld3_lane_u64 + #define vld3_lane_u64(ptr, src, lane) simde_vld3_lane_u64((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4x3_t simde_vld3_lane_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_float16x4x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_float16x4x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_float16x4_private tmp_ = simde_float16x4_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_float16x4_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vld3_lane_f16(ptr, src, lane) vld3_lane_f16(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_lane_f16 + #define vld3_lane_f16(ptr, src, lane) simde_vld3_lane_f16((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2x3_t simde_vld3_lane_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_float32x2x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x2x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_float32x2_private tmp_ = simde_float32x2_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_float32x2_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld3_lane_f32(ptr, src, lane) vld3_lane_f32(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_lane_f32 + #define vld3_lane_f32(ptr, src, lane) simde_vld3_lane_f32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1x3_t simde_vld3_lane_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_float64x1x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_float64x1x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_float64x1_private tmp_ = simde_float64x1_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_float64x1_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld3_lane_f64(ptr, src, lane) vld3_lane_f64(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld3_lane_f64 + #define vld3_lane_f64(ptr, src, lane) simde_vld3_lane_f64((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16x3_t simde_vld3q_lane_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_int8x16x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + simde_int8x16x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_int8x16_private tmp_ = simde_int8x16_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int8x16_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld3q_lane_s8(ptr, src, lane) vld3q_lane_s8(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_lane_s8 + #define vld3q_lane_s8(ptr, src, lane) simde_vld3q_lane_s8((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8x3_t simde_vld3q_lane_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_int16x8x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_int16x8x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_int16x8_private tmp_ = simde_int16x8_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int16x8_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld3q_lane_s16(ptr, src, lane) vld3q_lane_s16(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_lane_s16 + #define vld3q_lane_s16(ptr, src, lane) simde_vld3q_lane_s16((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4x3_t simde_vld3q_lane_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_int32x4x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int32x4x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_int32x4_private tmp_ = simde_int32x4_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int32x4_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld3q_lane_s32(ptr, src, lane) vld3q_lane_s32(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3q_lane_s32 + #define vld3q_lane_s32(ptr, src, lane) simde_vld3q_lane_s32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2x3_t simde_vld3q_lane_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_int64x2x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int64x2x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_int64x2_private tmp_ = simde_int64x2_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int64x2_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld3q_lane_s64(ptr, src, lane) vld3q_lane_s64(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_lane_s64 + #define vld3q_lane_s64(ptr, src, lane) simde_vld3q_lane_s64((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16x3_t simde_vld3q_lane_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint8x16x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + simde_uint8x16x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_uint8x16_private tmp_ = simde_uint8x16_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint8x16_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld3q_lane_u8(ptr, src, lane) vld3q_lane_u8(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_lane_u8 + #define vld3q_lane_u8(ptr, src, lane) simde_vld3q_lane_u8((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8x3_t simde_vld3q_lane_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint16x8x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_uint16x8x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_uint16x8_private tmp_ = simde_uint16x8_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint16x8_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld3q_lane_u16(ptr, src, lane) vld3q_lane_u16(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_lane_u16 + #define vld3q_lane_u16(ptr, src, lane) simde_vld3q_lane_u16((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4x3_t simde_vld3q_lane_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint32x4x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_uint32x4x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_uint32x4_private tmp_ = simde_uint32x4_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint32x4_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld3q_lane_u32(ptr, src, lane) vld3q_lane_u32(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3q_lane_u32 + #define vld3q_lane_u32(ptr, src, lane) simde_vld3q_lane_u32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2x3_t simde_vld3q_lane_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint64x2x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_uint64x2x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_uint64x2_private tmp_ = simde_uint64x2_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint64x2_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld3q_lane_u64(ptr, src, lane) vld3q_lane_u64(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_lane_u64 + #define vld3q_lane_u64(ptr, src, lane) simde_vld3q_lane_u64((ptr), (src), (lane)) +#endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8x3_t simde_vld3q_lane_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_float16x8x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_float16x8x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_float16x8_private tmp_ = simde_float16x8_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_float16x8_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vld3q_lane_f16(ptr, src, lane) vld3q_lane_f16(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3q_lane_f16 + #define vld3q_lane_f16(ptr, src, lane) simde_vld3q_lane_f16((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4x3_t simde_vld3q_lane_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_float32x4x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_float32x4x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_float32x4_private tmp_ = simde_float32x4_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_float32x4_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld3q_lane_f32(ptr, src, lane) vld3q_lane_f32(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3q_lane_f32 + #define vld3q_lane_f32(ptr, src, lane) simde_vld3q_lane_f32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2x3_t simde_vld3q_lane_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(3)], simde_float64x2x3_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float64x2x3_t r; + + for (size_t i = 0 ; i < 3 ; i++) { + simde_float64x2_private tmp_ = simde_float64x2_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_float64x2_from_private(tmp_); + } + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld3q_lane_f64(ptr, src, lane) vld3q_lane_f64(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_lane_f64 + #define vld3q_lane_f64(ptr, src, lane) simde_vld3q_lane_f64((ptr), (src), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_LD3_LANE_H) */ diff --git a/simde/arm/neon/ld4.h b/simde/arm/neon/ld4.h index b93618248..4eb2f3a47 100644 --- a/simde/arm/neon/ld4.h +++ b/simde/arm/neon/ld4.h @@ -23,6 +23,7 @@ * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_LD4_H) @@ -39,6 +40,26 @@ SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4x4_t +simde_vld4_f16(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vld4_f16(ptr); + #else + simde_float16x4_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_float16x4_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_float16x4x4_t s_ = { { simde_float16x4_from_private(a_[0]), simde_float16x4_from_private(a_[1]), + simde_float16x4_from_private(a_[2]), simde_float16x4_from_private(a_[3]) } }; + return (s_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_f16 + #define vld4_f16(a) simde_vld4_f16((a)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2x4_t simde_vld4_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(8)]) { diff --git a/simde/arm/neon/ld4_dup.h b/simde/arm/neon/ld4_dup.h new file mode 100644 index 000000000..5e297a35f --- /dev/null +++ b/simde/arm/neon/ld4_dup.h @@ -0,0 +1,458 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_LD4_DUP_H) +#define SIMDE_ARM_NEON_LD4_DUP_H + +#include "dup_n.h" +#include "reinterpret.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4x4_t +simde_vld4_dup_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vld4_dup_f16(ptr); + #else + simde_float16x4x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdup_n_f16(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_dup_f16 + #define vld4_dup_f16(a) simde_vld4_dup_f16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2x4_t +simde_vld4_dup_f32(simde_float32 const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4_dup_f32(ptr); + #else + simde_float32x2x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdup_n_f32(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_dup_f32 + #define vld4_dup_f32(a) simde_vld4_dup_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1x4_t +simde_vld4_dup_f64(simde_float64 const * ptr) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld4_dup_f64(ptr); + #else + simde_float64x1x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdup_n_f64(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld4_dup_f64 + #define vld4_dup_f64(a) simde_vld4_dup_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8x4_t +simde_vld4_dup_s8(int8_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4_dup_s8(ptr); + #else + simde_int8x8x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdup_n_s8(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_dup_s8 + #define vld4_dup_s8(a) simde_vld4_dup_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4x4_t +simde_vld4_dup_s16(int16_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4_dup_s16(ptr); + #else + simde_int16x4x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdup_n_s16(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_dup_s16 + #define vld4_dup_s16(a) simde_vld4_dup_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2x4_t +simde_vld4_dup_s32(int32_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4_dup_s32(ptr); + #else + simde_int32x2x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdup_n_s32(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_dup_s32 + #define vld4_dup_s32(a) simde_vld4_dup_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1x4_t +simde_vld4_dup_s64(int64_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4_dup_s64(ptr); + #else + simde_int64x1x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdup_n_s64(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_dup_s64 + #define vld4_dup_s64(a) simde_vld4_dup_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8x4_t +simde_vld4_dup_u8(uint8_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4_dup_u8(ptr); + #else + simde_uint8x8x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdup_n_u8(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_dup_u8 + #define vld4_dup_u8(a) simde_vld4_dup_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4x4_t +simde_vld4_dup_u16(uint16_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4_dup_u16(ptr); + #else + simde_uint16x4x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdup_n_u16(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_dup_u16 + #define vld4_dup_u16(a) simde_vld4_dup_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2x4_t +simde_vld4_dup_u32(uint32_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4_dup_u32(ptr); + #else + simde_uint32x2x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdup_n_u32(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_dup_u32 + #define vld4_dup_u32(a) simde_vld4_dup_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1x4_t +simde_vld4_dup_u64(uint64_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4_dup_u64(ptr); + #else + simde_uint64x1x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdup_n_u64(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_dup_u64 + #define vld4_dup_u64(a) simde_vld4_dup_u64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8x4_t +simde_vld4q_dup_f16(simde_float16 const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vld4q_dup_f16(ptr); + #else + simde_float16x8x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdupq_n_f16(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld4q_dup_f16 + #define vld4q_dup_f16(a) simde_vld4q_dup_f16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4x4_t +simde_vld4q_dup_f32(simde_float32 const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld4q_dup_f32(ptr); + #else + simde_float32x4x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdupq_n_f32(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld4q_dup_f32 + #define vld4q_dup_f32(a) simde_vld4q_dup_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2x4_t +simde_vld4q_dup_f64(simde_float64 const * ptr) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld4q_dup_f64(ptr); + #else + simde_float64x2x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdupq_n_f64(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld4q_dup_f64 + #define vld4q_dup_f64(a) simde_vld4q_dup_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16x4_t +simde_vld4q_dup_s8(int8_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld4q_dup_s8(ptr); + #else + simde_int8x16x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdupq_n_s8(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld4q_dup_s8 + #define vld4q_dup_s8(a) simde_vld4q_dup_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8x4_t +simde_vld4q_dup_s16(int16_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld4q_dup_s16(ptr); + #else + simde_int16x8x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdupq_n_s16(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld4q_dup_s16 + #define vld4q_dup_s16(a) simde_vld4q_dup_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4x4_t +simde_vld4q_dup_s32(int32_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld4q_dup_s32(ptr); + #else + simde_int32x4x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdupq_n_s32(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld4q_dup_s32 + #define vld4q_dup_s32(a) simde_vld4q_dup_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2x4_t +simde_vld4q_dup_s64(int64_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld4q_dup_s64(ptr); + #else + simde_int64x2x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdupq_n_s64(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld4q_dup_s64 + #define vld4q_dup_s64(a) simde_vld4q_dup_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16x4_t +simde_vld4q_dup_u8(uint8_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld4q_dup_u8(ptr); + #else + simde_uint8x16x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdupq_n_u8(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld4q_dup_u8 + #define vld4q_dup_u8(a) simde_vld4q_dup_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8x4_t +simde_vld4q_dup_u16(uint16_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld4q_dup_u16(ptr); + #else + simde_uint16x8x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdupq_n_u16(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld4q_dup_u16 + #define vld4q_dup_u16(a) simde_vld4q_dup_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4x4_t +simde_vld4q_dup_u32(uint32_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld4q_dup_u32(ptr); + #else + simde_uint32x4x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdupq_n_u32(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld4q_dup_u32 + #define vld4q_dup_u32(a) simde_vld4q_dup_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2x4_t +simde_vld4q_dup_u64(uint64_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vld4q_dup_u64(ptr); + #else + simde_uint64x2x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + r.val[i] = simde_vdupq_n_u64(ptr[i]); + } + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vld4q_dup_u64 + #define vld4q_dup_u64(a) simde_vld4q_dup_u64((a)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_LD3_DUP_H) */ diff --git a/simde/arm/neon/ld4_lane.h b/simde/arm/neon/ld4_lane.h index c525755d2..b3424c7d7 100644 --- a/simde/arm/neon/ld4_lane.h +++ b/simde/arm/neon/ld4_lane.h @@ -23,6 +23,7 @@ * Copyright: * 2021 Zhi An Ng (Copyright owned by Google, LLC) * 2021 Evan Nemerson + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ /* In older versions of clang, __builtin_neon_vld4_lane_v would @@ -99,6 +100,7 @@ simde_vld4_lane_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int16x4x4_t #define vld4_lane_s16(ptr, src, lane) simde_vld4_lane_s16((ptr), (src), (lane)) #endif + SIMDE_FUNCTION_ATTRIBUTES simde_int32x2x4_t simde_vld4_lane_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int32x2x4_t src, const int lane) @@ -261,6 +263,33 @@ simde_vld4_lane_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint64x1x4_ #define vld4_lane_u64(ptr, src, lane) simde_vld4_lane_u64((ptr), (src), (lane)) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4x4_t +simde_vld4_lane_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float16x4x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_float16x4x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_float16x4_private tmp_ = simde_float16x4_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_float16x4_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4_lane_f16(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_f16(ptr, src, lane)) + #else + #define simde_vld4_lane_f16(ptr, src, lane) vld4_lane_f16(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_lane_f16 + #define vld4_lane_f16(ptr, src, lane) simde_vld4_lane_f16((ptr), (src), (lane)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2x4_t simde_vld4_lane_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x2x4_t src, const int lane) @@ -531,6 +560,33 @@ simde_vld4q_lane_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint64x2x4 #define vld4q_lane_u64(ptr, src, lane) simde_vld4q_lane_u64((ptr), (src), (lane)) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8x4_t +simde_vld4q_lane_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float16x8x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_float16x8x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_float16x8_private tmp_ = simde_float16x8_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_float16x8_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4q_lane_f16(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_f16(ptr, src, lane)) + #else + #define simde_vld4q_lane_f16(ptr, src, lane) vld4q_lane_f16(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4q_lane_f16 + #define vld4q_lane_f16(ptr, src, lane) simde_vld4q_lane_f16((ptr), (src), (lane)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x4x4_t simde_vld4q_lane_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x4x4_t src, const int lane) diff --git a/simde/arm/neon/mla_lane.h b/simde/arm/neon/mla_lane.h index d7b41322f..755e65407 100644 --- a/simde/arm/neon/mla_lane.h +++ b/simde/arm/neon/mla_lane.h @@ -22,6 +22,7 @@ * * Copyright: * 2021 Zhi An Ng (Copyright owned by Google, LLC) + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_MLA_LANE_H) @@ -44,6 +45,28 @@ SIMDE_BEGIN_DECLS_ #define vmla_lane_f32(a, b, v, lane) simde_vmla_lane_f32((a), (b), (v), (lane)) #endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmla_laneq_f32(a, b, v, lane) vmla_laneq_f32((a), (b), (v), (lane)) +#else + #define simde_vmla_laneq_f32(a, b, v, lane) simde_vmla_f32((a), (b), simde_vdup_laneq_f32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmla_laneq_f32 + #define vmla_laneq_f32(a, b, v, lane) simde_vmla_laneq_f32((a), (b), (v), (lane)) +#endif + +/* Eric: Skip this function since it will trigger a compiler error when using i686-linux-gnu-g++-11. +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlaq_laneq_f32(a, b, v, lane) vmlaq_laneq_f32((a), (b), (v), (lane)) +#else + #define simde_vmlaq_laneq_f32(a, b, v, lane) simde_vmlaq_f32((a), (b), simde_vdupq_laneq_f32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlaq_laneq_f32 + #define vmlaq_laneq_f32(a, b, v, lane) simde_vmlaq_laneq_f32((a), (b), (v), (lane)) +#endif +*/ + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vmla_lane_s16(a, b, v, lane) vmla_lane_s16((a), (b), (v), (lane)) #else @@ -54,6 +77,26 @@ SIMDE_BEGIN_DECLS_ #define vmla_lane_s16(a, b, v, lane) simde_vmla_lane_s16((a), (b), (v), (lane)) #endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmla_laneq_s16(a, b, v, lane) vmla_laneq_s16((a), (b), (v), (lane)) +#else + #define simde_vmla_laneq_s16(a, b, v, lane) simde_vmla_s16((a), (b), simde_vdup_laneq_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmla_laneq_s16 + #define vmla_laneq_s16(a, b, v, lane) simde_vmla_laneq_s16((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlaq_laneq_s16(a, b, v, lane) vmlaq_laneq_s16((a), (b), (v), (lane)) +#else + #define simde_vmlaq_laneq_s16(a, b, v, lane) simde_vmlaq_s16((a), (b), simde_vdupq_laneq_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlaq_laneq_s16 + #define vmlaq_laneq_s16(a, b, v, lane) simde_vmlaq_laneq_s16((a), (b), (v), (lane)) +#endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vmla_lane_s32(a, b, v, lane) vmla_lane_s32((a), (b), (v), (lane)) #else @@ -64,6 +107,26 @@ SIMDE_BEGIN_DECLS_ #define vmla_lane_s32(a, b, v, lane) simde_vmla_lane_s32((a), (b), (v), (lane)) #endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmla_laneq_s32(a, b, v, lane) vmla_laneq_s32((a), (b), (v), (lane)) +#else + #define simde_vmla_laneq_s32(a, b, v, lane) simde_vmla_s32((a), (b), simde_vdup_laneq_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmla_laneq_s32 + #define vmla_laneq_s32(a, b, v, lane) simde_vmla_laneq_s32((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlaq_laneq_s32(a, b, v, lane) vmlaq_laneq_s32((a), (b), (v), (lane)) +#else + #define simde_vmlaq_laneq_s32(a, b, v, lane) simde_vmlaq_s32((a), (b), simde_vdupq_laneq_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlaq_laneq_s32 + #define vmlaq_laneq_s32(a, b, v, lane) simde_vmlaq_laneq_s32((a), (b), (v), (lane)) +#endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vmla_lane_u16(a, b, v, lane) vmla_lane_u16((a), (b), (v), (lane)) #else @@ -74,6 +137,26 @@ SIMDE_BEGIN_DECLS_ #define vmla_lane_u16(a, b, v, lane) simde_vmla_lane_u16((a), (b), (v), (lane)) #endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmla_laneq_u16(a, b, v, lane) vmla_laneq_u16((a), (b), (v), (lane)) +#else + #define simde_vmla_laneq_u16(a, b, v, lane) simde_vmla_u16((a), (b), simde_vdup_laneq_u16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmla_laneq_u16 + #define vmla_laneq_u16(a, b, v, lane) simde_vmla_laneq_u16((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlaq_laneq_u16(a, b, v, lane) vmlaq_laneq_u16((a), (b), (v), (lane)) +#else + #define simde_vmlaq_laneq_u16(a, b, v, lane) simde_vmlaq_u16((a), (b), simde_vdupq_laneq_u16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlaq_laneq_u16 + #define vmlaq_laneq_u16(a, b, v, lane) simde_vmlaq_laneq_u16((a), (b), (v), (lane)) +#endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vmla_lane_u32(a, b, v, lane) vmla_lane_u32((a), (b), (v), (lane)) #else @@ -84,6 +167,26 @@ SIMDE_BEGIN_DECLS_ #define vmla_lane_u32(a, b, v, lane) simde_vmla_lane_u32((a), (b), (v), (lane)) #endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmla_laneq_u32(a, b, v, lane) vmla_laneq_u32((a), (b), (v), (lane)) +#else + #define simde_vmla_laneq_u32(a, b, v, lane) simde_vmla_u32((a), (b), simde_vdup_laneq_u32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmla_laneq_u32 + #define vmla_laneq_u32(a, b, v, lane) simde_vmla_laneq_u32((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlaq_laneq_u32(a, b, v, lane) vmlaq_laneq_u32((a), (b), (v), (lane)) +#else + #define simde_vmlaq_laneq_u32(a, b, v, lane) simde_vmlaq_u32((a), (b), simde_vdupq_laneq_u32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlaq_laneq_u32 + #define vmlaq_laneq_u32(a, b, v, lane) simde_vmlaq_laneq_u32((a), (b), (v), (lane)) +#endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vmlaq_lane_f32(a, b, v, lane) vmlaq_lane_f32((a), (b), (v), (lane)) #else diff --git a/simde/arm/neon/mlal_high_lane.h b/simde/arm/neon/mlal_high_lane.h new file mode 100644 index 000000000..50018a95d --- /dev/null +++ b/simde/arm/neon/mlal_high_lane.h @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_MLAL_HIGH_LANE_H) +#define SIMDE_ARM_NEON_MLAL_HIGH_LANE_H + +#include "movl_high.h" +#include "mlal_high.h" +#include "dup_n.h" +#include "mla.h" +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmlal_high_lane_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_vmlal_high_s16(a, b, simde_vdupq_n_s16(simde_int16x4_to_private(v).values[lane])); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlal_high_lane_s16(a, b, v, lane) vmlal_high_lane_s16(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_high_lane_s16 + #define vmlal_high_lane_s16(a, b, v, lane) simde_vmlal_high_lane_s16((a), (b), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmlal_high_laneq_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + return simde_vmlal_high_s16(a, b, simde_vdupq_n_s16(simde_int16x8_to_private(v).values[lane])); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlal_high_laneq_s16(a, b, v, lane) vmlal_high_laneq_s16(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_high_laneq_s16 + #define vmlal_high_laneq_s16(a, b, v, lane) simde_vmlal_high_laneq_s16((a), (b), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vmlal_high_lane_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + return simde_vmlal_high_s32(a, b, simde_vdupq_n_s32(simde_int32x2_to_private(v).values[lane])); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlal_high_lane_s32(a, b, v, lane) vmlal_high_lane_s32(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_high_lane_s32 + #define vmlal_high_lane_s32(a, b, v, lane) simde_vmlal_high_lane_s32((a), (b), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vmlal_high_laneq_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_vmlal_high_s32(a, b, simde_vdupq_n_s32(simde_int32x4_to_private(v).values[lane])); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlal_high_laneq_s32(a, b, v, lane) vmlal_high_laneq_s32(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_high_laneq_s32 + #define vmlal_high_laneq_s32(a, b, v, lane) simde_vmlal_high_laneq_s32((a), (b), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmlal_high_lane_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_vmlal_high_u16(a, b, simde_vdupq_n_u16(simde_uint16x4_to_private(v).values[lane])); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlal_high_lane_u16(a, b, v, lane) vmlal_high_lane_u16(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_high_lane_u16 + #define vmlal_high_lane_u16(a, b, v, lane) simde_vmlal_high_lane_u16((a), (b), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmlal_high_laneq_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + return simde_vmlal_high_u16(a, b, simde_vdupq_n_u16(simde_uint16x8_to_private(v).values[lane])); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlal_high_laneq_u16(a, b, v, lane) vmlal_high_laneq_u16(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_high_laneq_u16 + #define vmlal_high_laneq_u16(a, b, v, lane) simde_vmlal_high_laneq_u16((a), (b), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vmlal_high_lane_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + return simde_vmlal_high_u32(a, b, simde_vdupq_n_u32(simde_uint32x2_to_private(v).values[lane])); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlal_high_lane_u32(a, b, v, lane) vmlal_high_lane_u32(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_high_lane_u32 + #define vmlal_high_lane_u32(a, b, v, lane) simde_vmlal_high_lane_u32((a), (b), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vmlal_high_laneq_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_vmlal_high_u32(a, b, simde_vdupq_n_u32(simde_uint32x4_to_private(v).values[lane])); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlal_high_laneq_u32(a, b, v, lane) vmlal_high_laneq_u32(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_high_laneq_u32 + #define vmlal_high_laneq_u32(a, b, v, lane) simde_vmlal_high_laneq_u32((a), (b), (v), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MLAL_HIGH_LANE_H) */ diff --git a/simde/arm/neon/mls_lane.h b/simde/arm/neon/mls_lane.h new file mode 100644 index 000000000..35855a2b7 --- /dev/null +++ b/simde/arm/neon/mls_lane.h @@ -0,0 +1,240 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_MLS_LANE_H) +#define SIMDE_ARM_NEON_MLS_LANE_H + +#include "mls.h" +#include "dup_lane.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmls_lane_f32(a, b, v, lane) vmls_lane_f32((a), (b), (v), (lane)) +#else + #define simde_vmls_lane_f32(a, b, v, lane) simde_vmls_f32((a), (b), simde_vdup_lane_f32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmls_lane_f32 + #define vmls_lane_f32(a, b, v, lane) simde_vmls_lane_f32((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmls_laneq_f32(a, b, v, lane) vmls_laneq_f32((a), (b), (v), (lane)) +#else + #define simde_vmls_laneq_f32(a, b, v, lane) simde_vmls_f32((a), (b), simde_vdup_laneq_f32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmls_laneq_f32 + #define vmls_laneq_f32(a, b, v, lane) simde_vmls_laneq_f32((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlsq_laneq_f32(a, b, v, lane) vmlsq_laneq_f32((a), (b), (v), (lane)) +#else + #define simde_vmlsq_laneq_f32(a, b, v, lane) simde_vmlsq_f32((a), (b), simde_vdupq_laneq_f32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsq_laneq_f32 + #define vmlsq_laneq_f32(a, b, v, lane) simde_vmlsq_laneq_f32((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmls_lane_s16(a, b, v, lane) vmls_lane_s16((a), (b), (v), (lane)) +#else + #define simde_vmls_lane_s16(a, b, v, lane) simde_vmls_s16((a), (b), simde_vdup_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmls_lane_s16 + #define vmls_lane_s16(a, b, v, lane) simde_vmls_lane_s16((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmls_laneq_s16(a, b, v, lane) vmls_laneq_s16((a), (b), (v), (lane)) +#else + #define simde_vmls_laneq_s16(a, b, v, lane) simde_vmls_s16((a), (b), simde_vdup_laneq_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmls_laneq_s16 + #define vmls_laneq_s16(a, b, v, lane) simde_vmls_laneq_s16((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlsq_laneq_s16(a, b, v, lane) vmlsq_laneq_s16((a), (b), (v), (lane)) +#else + #define simde_vmlsq_laneq_s16(a, b, v, lane) simde_vmlsq_s16((a), (b), simde_vdupq_laneq_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsq_laneq_s16 + #define vmlsq_laneq_s16(a, b, v, lane) simde_vmlsq_laneq_s16((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmls_lane_s32(a, b, v, lane) vmls_lane_s32((a), (b), (v), (lane)) +#else + #define simde_vmls_lane_s32(a, b, v, lane) simde_vmls_s32((a), (b), simde_vdup_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmls_lane_s32 + #define vmls_lane_s32(a, b, v, lane) simde_vmls_lane_s32((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmls_laneq_s32(a, b, v, lane) vmls_laneq_s32((a), (b), (v), (lane)) +#else + #define simde_vmls_laneq_s32(a, b, v, lane) simde_vmls_s32((a), (b), simde_vdup_laneq_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmls_laneq_s32 + #define vmls_laneq_s32(a, b, v, lane) simde_vmls_laneq_s32((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlsq_laneq_s32(a, b, v, lane) vmlsq_laneq_s32((a), (b), (v), (lane)) +#else + #define simde_vmlsq_laneq_s32(a, b, v, lane) simde_vmlsq_s32((a), (b), simde_vdupq_laneq_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsq_laneq_s32 + #define vmlsq_laneq_s32(a, b, v, lane) simde_vmlsq_laneq_s32((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmls_lane_u16(a, b, v, lane) vmls_lane_u16((a), (b), (v), (lane)) +#else + #define simde_vmls_lane_u16(a, b, v, lane) simde_vmls_u16((a), (b), simde_vdup_lane_u16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmls_lane_u16 + #define vmls_lane_u16(a, b, v, lane) simde_vmls_lane_u16((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmls_laneq_u16(a, b, v, lane) vmls_laneq_u16((a), (b), (v), (lane)) +#else + #define simde_vmls_laneq_u16(a, b, v, lane) simde_vmls_u16((a), (b), simde_vdup_laneq_u16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmls_laneq_u16 + #define vmls_laneq_u16(a, b, v, lane) simde_vmls_laneq_u16((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlsq_laneq_u16(a, b, v, lane) vmlsq_laneq_u16((a), (b), (v), (lane)) +#else + #define simde_vmlsq_laneq_u16(a, b, v, lane) simde_vmlsq_u16((a), (b), simde_vdupq_laneq_u16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsq_laneq_u16 + #define vmlsq_laneq_u16(a, b, v, lane) simde_vmlsq_laneq_u16((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmls_lane_u32(a, b, v, lane) vmls_lane_u32((a), (b), (v), (lane)) +#else + #define simde_vmls_lane_u32(a, b, v, lane) simde_vmls_u32((a), (b), simde_vdup_lane_u32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmls_lane_u32 + #define vmls_lane_u32(a, b, v, lane) simde_vmls_lane_u32((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmls_laneq_u32(a, b, v, lane) vmls_laneq_u32((a), (b), (v), (lane)) +#else + #define simde_vmls_laneq_u32(a, b, v, lane) simde_vmls_u32((a), (b), simde_vdup_laneq_u32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmls_laneq_u32 + #define vmls_laneq_u32(a, b, v, lane) simde_vmls_laneq_u32((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlsq_laneq_u32(a, b, v, lane) vmlsq_laneq_u32((a), (b), (v), (lane)) +#else + #define simde_vmlsq_laneq_u32(a, b, v, lane) simde_vmlsq_u32((a), (b), simde_vdupq_laneq_u32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsq_laneq_u32 + #define vmlsq_laneq_u32(a, b, v, lane) simde_vmlsq_laneq_u32((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmlsq_lane_f32(a, b, v, lane) vmlsq_lane_f32((a), (b), (v), (lane)) +#else + #define simde_vmlsq_lane_f32(a, b, v, lane) simde_vmlsq_f32((a), (b), simde_vdupq_lane_f32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsq_lane_f32 + #define vmlsq_lane_f32(a, b, v, lane) simde_vmlsq_lane_f32((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmlsq_lane_s16(a, b, v, lane) vmlsq_lane_s16((a), (b), (v), (lane)) +#else + #define simde_vmlsq_lane_s16(a, b, v, lane) simde_vmlsq_s16((a), (b), simde_vdupq_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsq_lane_s16 + #define vmlsq_lane_s16(a, b, v, lane) simde_vmlsq_lane_s16((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmlsq_lane_s32(a, b, v, lane) vmlsq_lane_s32((a), (b), (v), (lane)) +#else + #define simde_vmlsq_lane_s32(a, b, v, lane) simde_vmlsq_s32((a), (b), simde_vdupq_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsq_lane_s32 + #define vmlsq_lane_s32(a, b, v, lane) simde_vmlsq_lane_s32((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmlsq_lane_u16(a, b, v, lane) vmlsq_lane_u16((a), (b), (v), (lane)) +#else + #define simde_vmlsq_lane_u16(a, b, v, lane) simde_vmlsq_u16((a), (b), simde_vdupq_lane_u16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsq_lane_u16 + #define vmlsq_lane_u16(a, b, v, lane) simde_vmlsq_lane_u16((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmlsq_lane_u32(a, b, v, lane) vmlsq_lane_u32((a), (b), (v), (lane)) +#else + #define simde_vmlsq_lane_u32(a, b, v, lane) simde_vmlsq_u32((a), (b), simde_vdupq_lane_u32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsq_lane_u32 + #define vmlsq_lane_u32(a, b, v, lane) simde_vmlsq_lane_u32((a), (b), (v), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MLS_LANE_H) */ diff --git a/simde/arm/neon/mlsl_high_lane.h b/simde/arm/neon/mlsl_high_lane.h new file mode 100644 index 000000000..f45b7d989 --- /dev/null +++ b/simde/arm/neon/mlsl_high_lane.h @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_MLSL_HIGH_LANE_H) +#define SIMDE_ARM_NEON_MLSL_HIGH_LANE_H + +#include "movl_high.h" +#include "mlsl_high.h" +#include "dup_n.h" +#include "mls.h" +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmlsl_high_lane_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_vmlsl_high_s16(a, b, simde_vdupq_n_s16(simde_int16x4_to_private(v).values[lane])); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlsl_high_lane_s16(a, b, v, lane) vmlsl_high_lane_s16(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_high_lane_s16 + #define vmlsl_high_lane_s16(a, b, v, lane) simde_vmlsl_high_lane_s16((a), (b), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmlsl_high_laneq_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + return simde_vmlsl_high_s16(a, b, simde_vdupq_n_s16(simde_int16x8_to_private(v).values[lane])); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlsl_high_laneq_s16(a, b, v, lane) vmlsl_high_laneq_s16(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_high_laneq_s16 + #define vmlsl_high_laneq_s16(a, b, v, lane) simde_vmlsl_high_laneq_s16((a), (b), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vmlsl_high_lane_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + return simde_vmlsl_high_s32(a, b, simde_vdupq_n_s32(simde_int32x2_to_private(v).values[lane])); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlsl_high_lane_s32(a, b, v, lane) vmlsl_high_lane_s32(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_high_lane_s32 + #define vmlsl_high_lane_s32(a, b, v, lane) simde_vmlsl_high_lane_s32((a), (b), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vmlsl_high_laneq_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_vmlsl_high_s32(a, b, simde_vdupq_n_s32(simde_int32x4_to_private(v).values[lane])); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlsl_high_laneq_s32(a, b, v, lane) vmlsl_high_laneq_s32(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_high_laneq_s32 + #define vmlsl_high_laneq_s32(a, b, v, lane) simde_vmlsl_high_laneq_s32((a), (b), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmlsl_high_lane_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_vmlsl_high_u16(a, b, simde_vdupq_n_u16(simde_uint16x4_to_private(v).values[lane])); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlsl_high_lane_u16(a, b, v, lane) vmlsl_high_lane_u16(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_high_lane_u16 + #define vmlsl_high_lane_u16(a, b, v, lane) simde_vmlsl_high_lane_u16((a), (b), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmlsl_high_laneq_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + return simde_vmlsl_high_u16(a, b, simde_vdupq_n_u16(simde_uint16x8_to_private(v).values[lane])); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlsl_high_laneq_u16(a, b, v, lane) vmlsl_high_laneq_u16(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_high_laneq_u16 + #define vmlsl_high_laneq_u16(a, b, v, lane) simde_vmlsl_high_laneq_u16((a), (b), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vmlsl_high_lane_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + return simde_vmlsl_high_u32(a, b, simde_vdupq_n_u32(simde_uint32x2_to_private(v).values[lane])); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlsl_high_lane_u32(a, b, v, lane) vmlsl_high_lane_u32(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_high_lane_u32 + #define vmlsl_high_lane_u32(a, b, v, lane) simde_vmlsl_high_lane_u32((a), (b), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vmlsl_high_laneq_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_vmlsl_high_u32(a, b, simde_vdupq_n_u32(simde_uint32x4_to_private(v).values[lane])); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlsl_high_laneq_u32(a, b, v, lane) vmlsl_high_laneq_u32(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_high_laneq_u32 + #define vmlsl_high_laneq_u32(a, b, v, lane) simde_vmlsl_high_laneq_u32((a), (b), (v), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MLSL_HIGH_LANE_H) */ diff --git a/simde/arm/neon/mul_lane.h b/simde/arm/neon/mul_lane.h index c4b18df34..1ac2e9420 100644 --- a/simde/arm/neon/mul_lane.h +++ b/simde/arm/neon/mul_lane.h @@ -35,6 +35,25 @@ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ +SIMDE_FUNCTION_ATTRIBUTES +simde_float16_t +simde_vmulh_lane_f16(simde_float16_t a, simde_float16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_vmulh_f16(a, simde_float16x4_to_private(b).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vmulh_lane_f16(a, b, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmulh_lane_f16(a, b, lane)) + #else + #define simde_vmulh_lane_f16(a, b, lane) vmulh_lane_f16((a), (b), (lane)) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmulh_lane_f16 + #define vmulh_lane_f16(a, b, lane) simde_vmulh_lane_f16(a, b, lane) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float64_t simde_vmuld_lane_f64(simde_float64_t a, simde_float64x1_t b, const int lane) @@ -92,6 +111,25 @@ simde_vmuls_lane_f32(simde_float32_t a, simde_float32x2_t b, const int lane) #define vmuls_lane_f32(a, b, lane) simde_vmuls_lane_f32(a, b, lane) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16_t +simde_vmulh_laneq_f16(simde_float16_t a, simde_float16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + return simde_vmulh_f16(a, simde_float16x8_to_private(b).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vmulh_laneq_f16(a, b, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmulh_laneq_f16(a, b, lane)) + #else + #define simde_vmulh_laneq_f16(a, b, lane) vmulh_laneq_f16((a), (b), (lane)) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmulh_laneq_f16 + #define vmulh_laneq_f16(a, b, lane) simde_vmulh_laneq_f16(a, b, lane) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32_t simde_vmuls_laneq_f32(simde_float32_t a, simde_float32x4_t b, const int lane) @@ -111,6 +149,30 @@ simde_vmuls_laneq_f32(simde_float32_t a, simde_float32x4_t b, const int lane) #define vmuls_laneq_f32(a, b, lane) simde_vmuls_laneq_f32(a, b, lane) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vmul_lane_f16(simde_float16x4_t a, simde_float16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_float16x4_private + r_, + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vmulh_f16(a_.values[i], b_.values[lane]); + } + + return simde_float16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vmul_lane_f16(a, b, lane) vmul_lane_f16((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vmul_lane_f16 + #define vmul_lane_f16(a, b, lane) simde_vmul_lane_f16((a), (b), (lane)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vmul_lane_f32(simde_float32x2_t a, simde_float32x2_t b, const int lane) @@ -371,6 +433,9 @@ simde_vmulq_lane_f16(simde_float16x8_t a, simde_float16x4_t b, const int lane) return simde_float16x8_from_private(r_); } +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vmulq_lane_f16(a, b, lane) vmulq_lane_f16((a), (b), (lane)) +#endif #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vmulq_lane_f16 #define vmulq_lane_f16(a, b, lane) simde_vmulq_lane_f16((a), (b), (lane)) @@ -520,6 +585,30 @@ simde_vmulq_lane_u32(simde_uint32x4_t a, simde_uint32x2_t b, const int lane) #define vmulq_lane_u32(a, b, lane) simde_vmulq_lane_u32((a), (b), (lane)) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vmulq_laneq_f16(simde_float16x8_t a, simde_float16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_float16x8_private + r_, + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vmulh_f16(a_.values[i], b_.values[lane]); + } + + return simde_float16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vmulq_laneq_f16(a, b, lane) vmulq_laneq_f16((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmulq_laneq_f16 + #define vmulq_laneq_f16(a, b, lane) simde_vmulq_laneq_f16((a), (b), (lane)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vmulq_laneq_f32(simde_float32x4_t a, simde_float32x4_t b, const int lane) @@ -664,6 +753,30 @@ simde_vmulq_laneq_u32(simde_uint32x4_t a, simde_uint32x4_t b, const int lane) #define vmulq_laneq_u32(a, b, lane) simde_vmulq_laneq_u32((a), (b), (lane)) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vmul_laneq_f16(simde_float16x4_t a, simde_float16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_float16x4_private + r_, + a_ = simde_float16x4_to_private(a); + simde_float16x8_private b_ = simde_float16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vmulh_f16(a_.values[i], b_.values[lane]); + } + + return simde_float16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vmul_laneq_f16(a, b, lane) vmul_laneq_f16((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmul_laneq_f16 + #define vmul_laneq_f16(a, b, lane) simde_vmul_laneq_f16((a), (b), (lane)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vmul_laneq_f32(simde_float32x2_t a, simde_float32x4_t b, const int lane) diff --git a/simde/arm/neon/neg.h b/simde/arm/neon/neg.h index 779238950..e6b2a8e48 100644 --- a/simde/arm/neon/neg.h +++ b/simde/arm/neon/neg.h @@ -22,6 +22,7 @@ * * Copyright: * 2020 Evan Nemerson + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_NEG_H) @@ -47,6 +48,43 @@ simde_vnegd_s64(int64_t a) { #define vnegd_s64(a) simde_vnegd_s64(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16_t +simde_vnegh_f16(simde_float16_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vnegh_f16(a); + #else + return simde_float16_from_float32(-simde_float16_to_float32(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vnegh_f16 + #define vnegh_f16(a) simde_vnegh_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vneg_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vneg_f16(a); + #else + simde_float16x4_private + r_, + a_ = simde_float16x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vnegh_f16(a_.values[i]); + } + + return simde_float16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vneg_f16 + #define vneg_f16(a) simde_vneg_f16(a) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vneg_f32(simde_float32x2_t a) { @@ -209,6 +247,29 @@ simde_vneg_s64(simde_int64x1_t a) { #define vneg_s64(a) simde_vneg_s64(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vnegq_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vnegq_f16(a); + #else + simde_float16x8_private + r_, + a_ = simde_float16x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vnegh_f16(a_.values[i]); + } + + return simde_float16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vnegq_f16 + #define vnegq_f16(a) simde_vnegq_f16(a) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vnegq_f32(simde_float32x4_t a) { diff --git a/simde/arm/neon/qdmlal.h b/simde/arm/neon/qdmlal.h new file mode 100644 index 000000000..b23ab6fca --- /dev/null +++ b/simde/arm/neon/qdmlal.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QDMLAL_H) +#define SIMDE_ARM_NEON_QDMLAL_H + +#include "add.h" +#include "mul.h" +#include "mul_n.h" +#include "movl.h" +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqdmlalh_s16(a, b, c); + #else + return HEDLEY_STATIC_CAST(int32_t, b) * HEDLEY_STATIC_CAST(int32_t, c) * 2 + a; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlalh_s16 + #define vqdmlalh_s16(a, b, c) simde_vqdmlalh_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vqdmlals_s32(int64_t a, int32_t b, int32_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqdmlals_s32(a, b, c); + #else + return HEDLEY_STATIC_CAST(int64_t, b) * HEDLEY_STATIC_CAST(int64_t, c) * 2 + a; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlals_s32 + #define vqdmlals_s32(a, b, c) simde_vqdmlals_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmlal_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqdmlal_s16(a, b, c); + #else + return simde_vaddq_s32(simde_vmulq_n_s32(simde_vmulq_s32(simde_vmovl_s16(b), simde_vmovl_s16(c)), 2), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmlal_s16 + #define vqdmlal_s16(a, b, c) simde_vqdmlal_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqdmlal_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqdmlal_s32(a, b, c); + #else + simde_int64x2_private r_ = simde_int64x2_to_private( + simde_x_vmulq_s64( + simde_vmovl_s32(b), + simde_vmovl_s32(c))); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); + } + + return simde_vaddq_s64(a, simde_int64x2_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmlal_s32 + #define vqdmlal_s32(a, b, c) simde_vqdmlal_s32((a), (b), (c)) +#endif + + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDMLAL_H) */ diff --git a/simde/arm/neon/qdmlal_high.h b/simde/arm/neon/qdmlal_high.h new file mode 100644 index 000000000..016deb011 --- /dev/null +++ b/simde/arm/neon/qdmlal_high.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QDMLAL_HIGH_H) +#define SIMDE_ARM_NEON_QDMLAL_HIGH_H + +#include "movl_high.h" +#include "mla.h" +#include "mul_n.h" +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmlal_high_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqdmlal_high_s16(a, b, c); + #else + return simde_vaddq_s32( + simde_vmulq_n_s32( + simde_vmulq_s32( + simde_vmovl_high_s16(b), simde_vmovl_high_s16(c)), 2), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlal_high_s16 + #define vqdmlal_high_s16(a, b, c) simde_vqdmlal_high_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqdmlal_high_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqdmlal_high_s32(a, b, c); + #else + simde_int64x2_private r_ = simde_int64x2_to_private( + simde_x_vmulq_s64( + simde_vmovl_high_s32(b), + simde_vmovl_high_s32(c))); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); + } + + return simde_vaddq_s64(a, simde_int64x2_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlal_high_s32 + #define vqdmlal_high_s32(a, b, c) simde_vqdmlal_high_s32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDMLAL_HIGH_H) */ diff --git a/simde/arm/neon/qdmlal_high_lane.h b/simde/arm/neon/qdmlal_high_lane.h new file mode 100644 index 000000000..b2d6a8b42 --- /dev/null +++ b/simde/arm/neon/qdmlal_high_lane.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QDMLAL_HIGH_LANE_H) +#define SIMDE_ARM_NEON_QDMLAL_HIGH_LANE_H + +#include "movl_high.h" +#include "add.h" +#include "mul.h" +#include "mul_n.h" +#include "dup_n.h" +#include "mla.h" +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmlal_high_lane_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_vaddq_s32( + simde_vmulq_n_s32( + simde_vmulq_s32( + simde_vmovl_high_s16(b), + simde_vmovl_high_s16(simde_vdupq_n_s16(simde_int16x4_to_private(v).values[lane]))), 2), a); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmlal_high_lane_s16(a, b, v, lane) vqdmlal_high_lane_s16(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlal_high_lane_s16 + #define vqdmlal_high_lane_s16(a, b, v, lane) simde_vqdmlal_high_lane_s16((a), (b), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmlal_high_laneq_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + return simde_vaddq_s32( + simde_vmulq_n_s32( + simde_vmulq_s32( + simde_vmovl_high_s16(b), + simde_vmovl_high_s16(simde_vdupq_n_s16(simde_int16x8_to_private(v).values[lane]))), 2), a); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmlal_high_laneq_s16(a, b, v, lane) vqdmlal_high_laneq_s16(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlal_high_laneq_s16 + #define vqdmlal_high_laneq_s16(a, b, v, lane) simde_vqdmlal_high_laneq_s16((a), (b), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqdmlal_high_lane_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int64x2_private r_ = simde_int64x2_to_private( + simde_x_vmulq_s64( + simde_vmovl_high_s32(b), + simde_vmovl_high_s32(simde_vdupq_n_s32(simde_int32x2_to_private(v).values[lane])))); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); + } + + return simde_vaddq_s64(a, simde_int64x2_from_private(r_)); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmlal_high_lane_s32(a, b, v, lane) vqdmlal_high_lane_s32(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlal_high_lane_s32 + #define vqdmlal_high_lane_s32(a, b, v, lane) simde_vqdmlal_high_lane_s32((a), (b), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqdmlal_high_laneq_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int64x2_private r_ = simde_int64x2_to_private( + simde_x_vmulq_s64( + simde_vmovl_high_s32(b), + simde_vmovl_high_s32(simde_vdupq_n_s32(simde_int32x4_to_private(v).values[lane])))); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); + } + + return simde_vaddq_s64(a, simde_int64x2_from_private(r_)); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmlal_high_laneq_s32(a, b, v, lane) vqdmlal_high_laneq_s32(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlal_high_laneq_s32 + #define vqdmlal_high_laneq_s32(a, b, v, lane) simde_vqdmlal_high_laneq_s32((a), (b), (v), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDMLAL_HIGH_LANE_H) */ diff --git a/simde/arm/neon/qdmlal_high_n.h b/simde/arm/neon/qdmlal_high_n.h new file mode 100644 index 000000000..205cafbcc --- /dev/null +++ b/simde/arm/neon/qdmlal_high_n.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QDMLAL_HIGH_N_H) +#define SIMDE_ARM_NEON_QDMLAL_HIGH_N_H + +#include "movl_high.h" +#include "dup_n.h" +#include "add.h" +#include "mul.h" +#include "mul_n.h" +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmlal_high_n_s16(simde_int32x4_t a, simde_int16x8_t b, int16_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqdmlal_high_n_s16(a, b, c); + #else + return simde_vaddq_s32( + simde_vmulq_n_s32( + simde_vmulq_s32( + simde_vmovl_high_s16(b), + simde_vmovl_high_s16(simde_vdupq_n_s16(c))), 2), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlal_high_n_s16 + #define vqdmlal_high_n_s16(a, b, c) simde_vqdmlal_high_n_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqdmlal_high_n_s32(simde_int64x2_t a, simde_int32x4_t b, int32_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqdmlal_high_n_s32(a, b, c); + #else + simde_int64x2_private r_ = simde_int64x2_to_private( + simde_x_vmulq_s64( + simde_vmovl_high_s32(b), + simde_vmovl_high_s32(simde_vdupq_n_s32(c)))); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); + } + + return simde_vaddq_s64(a, simde_int64x2_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlal_high_n_s32 + #define vqdmlal_high_n_s32(a, b, c) simde_vqdmlal_high_n_s32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDMLAL_HIGH_N_H) */ diff --git a/simde/arm/neon/qdmlal_lane.h b/simde/arm/neon/qdmlal_lane.h new file mode 100644 index 000000000..14a663cd6 --- /dev/null +++ b/simde/arm/neon/qdmlal_lane.h @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QDMLAL_LANE_H) +#define SIMDE_ARM_NEON_QDMLAL_LANE_H + +#include "qdmlal.h" +#include "dup_lane.h" +#include "get_lane.h" +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqdmlal_lane_s16(a, b, v, lane) vqdmlal_lane_s16((a), (b), (v), (lane)) +#else + #define simde_vqdmlal_lane_s16(a, b, v, lane) simde_vqdmlal_s16((a), (b), simde_vdup_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmlal_lane_s16 + #define vqdmlal_lane_s16(a, b, c, lane) simde_vqdmlal_lane_s16((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqdmlal_lane_s32(a, b, v, lane) vqdmlal_lane_s32((a), (b), (v), (lane)) +#else + #define simde_vqdmlal_lane_s32(a, b, v, lane) simde_vqdmlal_s32((a), (b), simde_vdup_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmlal_lane_s32 + #define vqdmlal_lane_s32(a, b, c, lane) simde_vqdmlal_lane_s32((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmlal_laneq_s16(a, b, v, lane) vqdmlal_laneq_s16((a), (b), (v), (lane)) +#else + #define simde_vqdmlal_laneq_s16(a, b, v, lane) simde_vqdmlal_s16((a), (b), simde_vdup_laneq_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlal_laneq_s16 + #define vqdmlal_laneq_s16(a, b, c, lane) simde_vqdmlal_laneq_s16((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmlal_laneq_s32(a, b, v, lane) vqdmlal_laneq_s32((a), (b), (v), (lane)) +#else + #define simde_vqdmlal_laneq_s32(a, b, v, lane) simde_vqdmlal_s32((a), (b), simde_vdup_laneq_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlal_laneq_s32 + #define vqdmlal_laneq_s32(a, b, c, lane) simde_vqdmlal_laneq_s32((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmlalh_lane_s16(a, b, v, lane) vqdmlalh_lane_s16((a), (b), (v), (lane)) +#else + #define simde_vqdmlalh_lane_s16(a, b, v, lane) simde_vqdmlalh_s16((a), (b), simde_vget_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlalh_lane_s16 + #define vqdmlalh_lane_s16(a, b, c, lane) simde_vqdmlalh_lane_s16((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmlalh_laneq_s16(a, b, v, lane) vqdmlalh_laneq_s16((a), (b), (v), (lane)) +#else + #define simde_vqdmlalh_laneq_s16(a, b, v, lane) simde_vqdmlalh_s16((a), (b), simde_vgetq_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlalh_laneq_s16 + #define vqdmlalh_laneq_s16(a, b, c, lane) simde_vqdmlalh_laneq_s16((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmlals_lane_s32(a, b, v, lane) vqdmlals_lane_s32((a), (b), (v), (lane)) +#else + #define simde_vqdmlals_lane_s32(a, b, v, lane) simde_vqdmlals_s32((a), (b), simde_vget_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlals_lane_s32 + #define vqdmlals_lane_s32(a, b, c, lane) simde_vqdmlals_lane_s32((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmlals_laneq_s32(a, b, v, lane) vqdmlals_laneq_s32((a), (b), (v), (lane)) +#else + #define simde_vqdmlals_laneq_s32(a, b, v, lane) simde_vqdmlals_s32((a), (b), simde_vgetq_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlals_laneq_s32 + #define vqdmlals_laneq_s32(a, b, c, lane) simde_vqdmlals_laneq_s32((a), (b), (c), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDMLAL_LANE_H) */ diff --git a/simde/arm/neon/qdmlal_n.h b/simde/arm/neon/qdmlal_n.h new file mode 100644 index 000000000..0a5c69ea3 --- /dev/null +++ b/simde/arm/neon/qdmlal_n.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QDMLAL_N_H) +#define SIMDE_ARM_NEON_QDMLAL_N_H + +#include "dup_n.h" +#include "qdmlal.h" +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmlal_n_s16(simde_int32x4_t a, simde_int16x4_t b, int16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqdmlal_n_s16(a, b, c); + #else + return simde_vqdmlal_s16(a, b, simde_vdup_n_s16(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmlal_n_s16 + #define vqdmlal_n_s16(a, b, c) simde_vqdmlal_n_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqdmlal_n_s32(simde_int64x2_t a, simde_int32x2_t b, int32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqdmlal_n_s32(a, b, c); + #else + return simde_vqdmlal_s32(a, b, simde_vdup_n_s32(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmlal_n_s32 + #define vqdmlal_n_s32(a, b, c) simde_vqdmlal_n_s32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDMLAL_N_H) */ diff --git a/simde/arm/neon/qdmlsl.h b/simde/arm/neon/qdmlsl.h new file mode 100644 index 000000000..e7770ac61 --- /dev/null +++ b/simde/arm/neon/qdmlsl.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QDMLSL_H) +#define SIMDE_ARM_NEON_QDMLSL_H + +#include "sub.h" +#include "mul.h" +#include "mul_n.h" +#include "movl.h" +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqdmlslh_s16(a, b, c); + #else + return a - HEDLEY_STATIC_CAST(int32_t, b) * HEDLEY_STATIC_CAST(int32_t, c) * 2; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlslh_s16 + #define vqdmlslh_s16(a, b, c) simde_vqdmlslh_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqdmlsls_s32(a, b, c); + #else + return a - HEDLEY_STATIC_CAST(int64_t, b) * HEDLEY_STATIC_CAST(int64_t, c) * 2; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlsls_s32 + #define vqdmlsls_s32(a, b, c) simde_vqdmlsls_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmlsl_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqdmlsl_s16(a, b, c); + #else + return simde_vsubq_s32(a, simde_vmulq_n_s32(simde_vmulq_s32(simde_vmovl_s16(b), simde_vmovl_s16(c)), 2)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmlsl_s16 + #define vqdmlsl_s16(a, b, c) simde_vqdmlsl_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqdmlsl_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqdmlsl_s32(a, b, c); + #else + simde_int64x2_private r_ = simde_int64x2_to_private( + simde_x_vmulq_s64( + simde_vmovl_s32(b), + simde_vmovl_s32(c))); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); + } + + return simde_vsubq_s64(a, simde_int64x2_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmlsl_s32 + #define vqdmlsl_s32(a, b, c) simde_vqdmlsl_s32((a), (b), (c)) +#endif + + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDMLSL_H) */ diff --git a/simde/arm/neon/qdmlsl_high.h b/simde/arm/neon/qdmlsl_high.h new file mode 100644 index 000000000..18a6f47fe --- /dev/null +++ b/simde/arm/neon/qdmlsl_high.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QDMLSL_HIGH_H) +#define SIMDE_ARM_NEON_QDMLSL_HIGH_H + +#include "movl_high.h" +#include "sub.h" +#include "mul.h" +#include "mul_n.h" +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmlsl_high_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqdmlsl_high_s16(a, b, c); + #else + return simde_vsubq_s32(a, simde_vmulq_n_s32(simde_vmulq_s32(simde_vmovl_high_s16(b), simde_vmovl_high_s16(c)), 2)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlsl_high_s16 + #define vqdmlsl_high_s16(a, b, c) simde_vqdmlsl_high_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqdmlsl_high_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqdmlsl_high_s32(a, b, c); + #else + simde_int64x2_private r_ = simde_int64x2_to_private( + simde_x_vmulq_s64( + simde_vmovl_high_s32(b), + simde_vmovl_high_s32(c))); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); + } + + return simde_vsubq_s64(a, simde_int64x2_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlsl_high_s32 + #define vqdmlsl_high_s32(a, b, c) simde_vqdmlsl_high_s32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDMLSL_HIGH_H) */ diff --git a/simde/arm/neon/qdmlsl_high_lane.h b/simde/arm/neon/qdmlsl_high_lane.h new file mode 100644 index 000000000..877c72a2a --- /dev/null +++ b/simde/arm/neon/qdmlsl_high_lane.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QDMLSL_HIGH_LANE_H) +#define SIMDE_ARM_NEON_QDMLSL_HIGH_LANE_H + +#include "movl_high.h" +#include "sub.h" +#include "mul.h" +#include "mul_n.h" +#include "dup_n.h" +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmlsl_high_lane_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_vsubq_s32(a, + simde_vmulq_n_s32( + simde_vmulq_s32( + simde_vmovl_high_s16(b), + simde_vmovl_high_s16(simde_vdupq_n_s16(simde_int16x4_to_private(v).values[lane]))), 2)); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmlsl_high_lane_s16(a, b, v, lane) vqdmlsl_high_lane_s16(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlsl_high_lane_s16 + #define vqdmlsl_high_lane_s16(a, b, v, lane) simde_vqdmlsl_high_lane_s16((a), (b), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmlsl_high_laneq_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + return simde_vsubq_s32(a, + simde_vmulq_n_s32( + simde_vmulq_s32( + simde_vmovl_high_s16(b), + simde_vmovl_high_s16(simde_vdupq_n_s16(simde_int16x8_to_private(v).values[lane]))), 2)); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmlsl_high_laneq_s16(a, b, v, lane) vqdmlsl_high_laneq_s16(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlsl_high_laneq_s16 + #define vqdmlsl_high_laneq_s16(a, b, v, lane) simde_vqdmlsl_high_laneq_s16((a), (b), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqdmlsl_high_lane_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int64x2_private r_ = simde_int64x2_to_private( + simde_x_vmulq_s64( + simde_vmovl_high_s32(b), + simde_vmovl_high_s32(simde_vdupq_n_s32(simde_int32x2_to_private(v).values[lane])))); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); + } + + return simde_vsubq_s64(a, simde_int64x2_from_private(r_)); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmlsl_high_lane_s32(a, b, v, lane) vqdmlsl_high_lane_s32(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlsl_high_lane_s32 + #define vqdmlsl_high_lane_s32(a, b, v, lane) simde_vqdmlsl_high_lane_s32((a), (b), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqdmlsl_high_laneq_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int64x2_private r_ = simde_int64x2_to_private( + simde_x_vmulq_s64( + simde_vmovl_high_s32(b), + simde_vmovl_high_s32(simde_vdupq_n_s32(simde_int32x4_to_private(v).values[lane])))); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); + } + + return simde_vsubq_s64(a, simde_int64x2_from_private(r_)); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmlsl_high_laneq_s32(a, b, v, lane) vqdmlsl_high_laneq_s32(a, b, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlsl_high_laneq_s32 + #define vqdmlsl_high_laneq_s32(a, b, v, lane) simde_vqdmlsl_high_laneq_s32((a), (b), (v), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDMLSL_HIGH_LANE_H) */ diff --git a/simde/arm/neon/qdmlsl_high_n.h b/simde/arm/neon/qdmlsl_high_n.h new file mode 100644 index 000000000..9db3d7e04 --- /dev/null +++ b/simde/arm/neon/qdmlsl_high_n.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QDMLSL_HIGH_N_H) +#define SIMDE_ARM_NEON_QDMLSL_HIGH_N_H + +#include "movl_high.h" +#include "dup_n.h" +#include "sub.h" +#include "mul.h" +#include "mul_n.h" +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmlsl_high_n_s16(simde_int32x4_t a, simde_int16x8_t b, int16_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqdmlsl_high_n_s16(a, b, c); + #else + return simde_vsubq_s32(a, + simde_vmulq_n_s32( + simde_vmulq_s32( + simde_vmovl_high_s16(b), + simde_vmovl_high_s16(simde_vdupq_n_s16(c))), 2)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlsl_high_n_s16 + #define vqdmlsl_high_n_s16(a, b, c) simde_vqdmlsl_high_n_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqdmlsl_high_n_s32(simde_int64x2_t a, simde_int32x4_t b, int32_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqdmlsl_high_n_s32(a, b, c); + #else + simde_int64x2_private r_ = simde_int64x2_to_private( + simde_x_vmulq_s64( + simde_vmovl_high_s32(b), + simde_vmovl_high_s32(simde_vdupq_n_s32(c)))); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); + } + + return simde_vsubq_s64(a, simde_int64x2_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlsl_high_n_s32 + #define vqdmlsl_high_n_s32(a, b, c) simde_vqdmlsl_high_n_s32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDMLSL_HIGH_N_H) */ diff --git a/simde/arm/neon/qdmlsl_lane.h b/simde/arm/neon/qdmlsl_lane.h new file mode 100644 index 000000000..d93677da0 --- /dev/null +++ b/simde/arm/neon/qdmlsl_lane.h @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QDMLSL_LANE_H) +#define SIMDE_ARM_NEON_QDMLSL_LANE_H + +#include "qdmlsl.h" +#include "dup_lane.h" +#include "get_lane.h" +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqdmlsl_lane_s16(a, b, v, lane) vqdmlsl_lane_s16((a), (b), (v), (lane)) +#else + #define simde_vqdmlsl_lane_s16(a, b, v, lane) simde_vqdmlsl_s16((a), (b), simde_vdup_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmlsl_lane_s16 + #define vqdmlsl_lane_s16(a, b, c, lane) simde_vqdmlsl_lane_s16((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqdmlsl_lane_s32(a, b, v, lane) vqdmlsl_lane_s32((a), (b), (v), (lane)) +#else + #define simde_vqdmlsl_lane_s32(a, b, v, lane) simde_vqdmlsl_s32((a), (b), simde_vdup_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmlsl_lane_s32 + #define vqdmlsl_lane_s32(a, b, c, lane) simde_vqdmlsl_lane_s32((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmlsl_laneq_s16(a, b, v, lane) vqdmlsl_laneq_s16((a), (b), (v), (lane)) +#else + #define simde_vqdmlsl_laneq_s16(a, b, v, lane) simde_vqdmlsl_s16((a), (b), simde_vdup_laneq_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlsl_laneq_s16 + #define vqdmlsl_laneq_s16(a, b, c, lane) simde_vqdmlsl_laneq_s16((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmlsl_laneq_s32(a, b, v, lane) vqdmlsl_laneq_s32((a), (b), (v), (lane)) +#else + #define simde_vqdmlsl_laneq_s32(a, b, v, lane) simde_vqdmlsl_s32((a), (b), simde_vdup_laneq_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlsl_laneq_s32 + #define vqdmlsl_laneq_s32(a, b, c, lane) simde_vqdmlsl_laneq_s32((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmlslh_lane_s16(a, b, v, lane) vqdmlslh_lane_s16((a), (b), (v), (lane)) +#else + #define simde_vqdmlslh_lane_s16(a, b, v, lane) simde_vqdmlslh_s16((a), (b), simde_vget_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlslh_lane_s16 + #define vqdmlslh_lane_s16(a, b, c, lane) simde_vqdmlslh_lane_s16((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmlslh_laneq_s16(a, b, v, lane) vqdmlslh_laneq_s16((a), (b), (v), (lane)) +#else + #define simde_vqdmlslh_laneq_s16(a, b, v, lane) simde_vqdmlslh_s16((a), (b), simde_vgetq_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlslh_laneq_s16 + #define vqdmlslh_laneq_s16(a, b, c, lane) simde_vqdmlslh_laneq_s16((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmlsls_lane_s32(a, b, v, lane) vqdmlsls_lane_s32((a), (b), (v), (lane)) +#else + #define simde_vqdmlsls_lane_s32(a, b, v, lane) simde_vqdmlsls_s32((a), (b), simde_vget_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlsls_lane_s32 + #define vqdmlsls_lane_s32(a, b, c, lane) simde_vqdmlsls_lane_s32((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmlsls_laneq_s32(a, b, v, lane) vqdmlsls_laneq_s32((a), (b), (v), (lane)) +#else + #define simde_vqdmlsls_laneq_s32(a, b, v, lane) simde_vqdmlsls_s32((a), (b), simde_vgetq_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmlsls_laneq_s32 + #define vqdmlsls_laneq_s32(a, b, c, lane) simde_vqdmlsls_laneq_s32((a), (b), (c), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDmlsl_LANE_H) */ diff --git a/simde/arm/neon/qdmlsl_n.h b/simde/arm/neon/qdmlsl_n.h new file mode 100644 index 000000000..5707f4c47 --- /dev/null +++ b/simde/arm/neon/qdmlsl_n.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QDMLSL_N_H) +#define SIMDE_ARM_NEON_QDMLSL_N_H + +#include "dup_n.h" +#include "qdmlsl.h" +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmlsl_n_s16(simde_int32x4_t a, simde_int16x4_t b, int16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqdmlsl_n_s16(a, b, c); + #else + return simde_vqdmlsl_s16(a, b, simde_vdup_n_s16(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmlsl_n_s16 + #define vqdmlsl_n_s16(a, b, c) simde_vqdmlsl_n_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqdmlsl_n_s32(simde_int64x2_t a, simde_int32x2_t b, int32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqdmlsl_n_s32(a, b, c); + #else + return simde_vqdmlsl_s32(a, b, simde_vdup_n_s32(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmlsl_n_s32 + #define vqdmlsl_n_s32(a, b, c) simde_vqdmlsl_n_s32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDMLSL_N_H) */ diff --git a/simde/arm/neon/qdmulh.h b/simde/arm/neon/qdmulh.h index 17fe37b95..29d1078cb 100644 --- a/simde/arm/neon/qdmulh.h +++ b/simde/arm/neon/qdmulh.h @@ -23,6 +23,7 @@ * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_QDMULH_H) @@ -89,6 +90,21 @@ simde_vqdmulh_s16(simde_int16x4_t a, simde_int16x4_t b) { #define vqdmulh_s16(a, b) simde_vqdmulh_s16((a), (b)) #endif +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vqdmulhh_s16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqdmulhh_s16(a, b); + #else + int32_t tmp = simde_vqdmullh_s16(a, b); + return HEDLEY_STATIC_CAST(int16_t, tmp >> 16); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmulhh_s16 + #define vqdmulhh_s16(a, b) simde_vqdmulhh_s16((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vqdmulh_s32(simde_int32x2_t a, simde_int32x2_t b) { diff --git a/simde/arm/neon/qdmulh_lane.h b/simde/arm/neon/qdmulh_lane.h index 3120eb7ad..32cd22dea 100644 --- a/simde/arm/neon/qdmulh_lane.h +++ b/simde/arm/neon/qdmulh_lane.h @@ -23,6 +23,7 @@ * Copyright: * 2021 Evan Nemerson * 2021 Zhi An Ng (Copyright owned by Google, LLC) + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_QDMULH_LANE_H) @@ -37,6 +38,17 @@ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmulhh_lane_s16(a, v, lane) vqdmulhh_lane_s16((a), (v), (lane)) +#else + #define simde_vqdmulhh_lane_s16(a, v, lane) \ + simde_vqdmulhh_s16((a), simde_vget_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmulhh_lane_s16 + #define vqdmulhh_lane_s16(a, v, lane) simde_vqdmulhh_lane_s16((a), (v), (lane)) +#endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vqdmulh_lane_s16(a, v, lane) vqdmulh_lane_s16((a), (v), (lane)) #else @@ -81,6 +93,17 @@ SIMDE_BEGIN_DECLS_ #define vqdmulhq_lane_s32(a, v, lane) simde_vqdmulhq_lane_s32((a), (v), (lane)) #endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmulhh_laneq_s16(a, v, lane) vqdmulhh_laneq_s16((a), (v), (lane)) +#else + #define simde_vqdmulhh_laneq_s16(a, v, lane) \ + simde_vqdmulhh_s16((a), simde_vgetq_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmulhh_laneq_s16 + #define vqdmulhh_laneq_s16(a, v, lane) simde_vqdmulhh_laneq_s16((a), (v), (lane)) +#endif + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmulh_laneq_s16(a, v, lane) vqdmulh_laneq_s16((a), (v), (lane)) #else diff --git a/simde/arm/neon/qdmull.h b/simde/arm/neon/qdmull.h index 88bf50bcb..871257f61 100644 --- a/simde/arm/neon/qdmull.h +++ b/simde/arm/neon/qdmull.h @@ -23,6 +23,7 @@ * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ /* Implementation notes (seanptmaher): @@ -67,8 +68,8 @@ simde_vqdmulls_s32(int32_t a, int32_t b) { #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vqdmulls_s16 - #define vqdmulls_s16(a, b) simde_vqdmulls_s16((a), (b)) + #undef vqdmulls_s32 + #define vqdmulls_s32(a, b) simde_vqdmulls_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES diff --git a/simde/arm/neon/qdmull_high.h b/simde/arm/neon/qdmull_high.h new file mode 100644 index 000000000..2c6b26912 --- /dev/null +++ b/simde/arm/neon/qdmull_high.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QDMULL_HIGH_H) +#define SIMDE_ARM_NEON_QDMULL_HIGH_H + +#include "combine.h" +#include "get_high.h" +#include "qdmull.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmull_high_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqdmull_high_s16(a, b); + #else + return simde_vqdmull_s16(simde_vget_high_s16(a), simde_vget_high_s16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmull_high_s16 + #define vqdmull_high_s16(a, b) simde_vqdmull_high_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqdmull_high_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqdmull_high_s32(a, b); + #else + return simde_vqdmull_s32(simde_vget_high_s32(a), simde_vget_high_s32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmull_high_s32 + #define vqdmull_high_s32(a, b) simde_vqdmull_high_s32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDMULL_HIGH_H) */ diff --git a/simde/arm/neon/qdmull_high_lane.h b/simde/arm/neon/qdmull_high_lane.h new file mode 100644 index 000000000..f8326b2bf --- /dev/null +++ b/simde/arm/neon/qdmull_high_lane.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QDMULL_HIGH_LANE_H) +#define SIMDE_ARM_NEON_QDMULL_HIGH_LANE_H + +#include "combine.h" +#include "qdmull.h" +#include "dup_n.h" +#include "get_high.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmull_high_lane_s16(simde_int16x8_t a, simde_int16x4_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int16x4_private + v_ = simde_int16x4_to_private(v); + return simde_vqdmull_s16(simde_vget_high_s16(a), simde_vdup_n_s16(v_.values[lane])); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmull_high_lane_s16(a, v, lane) vqdmull_high_lane_s16(a, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmull_high_lane_s16 + #define vqdmull_high_lane_s16(a, v, lane) simde_vqdmull_high_lane_s16((a), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmull_high_laneq_s16(simde_int16x8_t a, simde_int16x8_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_int16x8_private + v_ = simde_int16x8_to_private(v); + return simde_vqdmull_s16(simde_vget_high_s16(a), simde_vdup_n_s16(v_.values[lane])); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmull_high_laneq_s16(a, v, lane) vqdmull_high_laneq_s16(a, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmull_high_laneq_s16 + #define vqdmull_high_laneq_s16(a, v, lane) simde_vqdmull_high_laneq_s16((a), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqdmull_high_lane_s32(simde_int32x4_t a, simde_int32x2_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int32x2_private + v_ = simde_int32x2_to_private(v); + return simde_vqdmull_s32(simde_vget_high_s32(a), simde_vdup_n_s32(v_.values[lane])); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmull_high_lane_s32(a, v, lane) vqdmull_high_lane_s32(a, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmull_high_lane_s32 + #define vqdmull_high_lane_s32(a, v, lane) simde_vqdmull_high_lane_s32((a), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqdmull_high_laneq_s32(simde_int32x4_t a, simde_int32x4_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int32x4_private + v_ = simde_int32x4_to_private(v); + return simde_vqdmull_s32(simde_vget_high_s32(a), simde_vdup_n_s32(v_.values[lane])); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmull_high_laneq_s32(a, v, lane) vqdmull_high_laneq_s32(a, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmull_high_laneq_s32 + #define vqdmull_high_laneq_s32(a, v, lane) simde_vqdmull_high_laneq_s32((a), (v), (lane)) +#endif + + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDMULL_HIGH_LANE_H) */ diff --git a/simde/arm/neon/qdmull_high_n.h b/simde/arm/neon/qdmull_high_n.h new file mode 100644 index 000000000..aef31240f --- /dev/null +++ b/simde/arm/neon/qdmull_high_n.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QDMULL_HIGH_N_H) +#define SIMDE_ARM_NEON_QDMULL_HIGH_N_H + +#include "combine.h" +#include "get_high.h" +#include "dup_n.h" +#include "qdmull.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmull_high_n_s16(simde_int16x8_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqdmull_high_n_s16(a, b); + #else + return simde_vqdmull_s16(simde_vget_high_s16(a), simde_vdup_n_s16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmull_high_n_s16 + #define vqdmull_high_n_s16(a, b) simde_vqdmull_high_n_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqdmull_high_n_s32(simde_int32x4_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqdmull_high_n_s32(a, b); + #else + return simde_vqdmull_s32(simde_vget_high_s32(a), simde_vdup_n_s32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmull_high_n_s32 + #define vqdmull_high_n_s32(a, b) simde_vqdmull_high_n_s32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDMULL_HIGH_N_H) */ diff --git a/simde/arm/neon/qdmull_lane.h b/simde/arm/neon/qdmull_lane.h new file mode 100644 index 000000000..a7bf68cbd --- /dev/null +++ b/simde/arm/neon/qdmull_lane.h @@ -0,0 +1,206 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QDMULL_LANE_H) +#define SIMDE_ARM_NEON_QDMULL_LANE_H + +#include "combine.h" +#include "qdmull.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vqdmullh_lane_s16(int16_t a, simde_int16x4_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int16x4_private + v_ = simde_int16x4_to_private(v); + + return simde_vqdmullh_s16(a, v_.values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmullh_lane_s16(a, v, lane) vqdmullh_lane_s16(a, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmullh_lane_s16 + #define vqdmullh_lane_s16(a, v, lane) simde_vqdmullh_lane_s16((a), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vqdmullh_laneq_s16(int16_t a, simde_int16x8_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_int16x8_private + v_ = simde_int16x8_to_private(v); + + return simde_vqdmullh_s16(a, v_.values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmullh_laneq_s16(a, v, lane) vqdmullh_laneq_s16(a, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmullh_laneq_s16 + #define vqdmullh_laneq_s16(a, v, lane) simde_vqdmullh_laneq_s16((a), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vqdmulls_lane_s32(int32_t a, simde_int32x2_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int32x2_private + v_ = simde_int32x2_to_private(v); + + return simde_vqdmulls_s32(a, v_.values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmulls_lane_s32(a, v, lane) vqdmulls_lane_s32(a, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmulls_lane_s32 + #define vqdmulls_lane_s32(a, v, lane) simde_vqdmulls_lane_s32((a), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vqdmulls_laneq_s32(int32_t a, simde_int32x4_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int32x4_private + v_ = simde_int32x4_to_private(v); + + return simde_vqdmulls_s32(a, v_.values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmulls_laneq_s32(a, v, lane) vqdmulls_laneq_s32(a, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmulls_laneq_s32 + #define vqdmulls_laneq_s32(a, v, lane) simde_vqdmulls_laneq_s32((a), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmull_lane_s16(simde_int16x4_t a, simde_int16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int32x4_private r_; + simde_int16x4_private + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqdmullh_s16(a_.values[i], b_.values[lane]); + } + + return simde_int32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqdmull_lane_s16(a, v, lane) vqdmull_lane_s16(a, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmull_lane_s16 + #define vqdmull_lane_s16(a, v, lane) simde_vqdmull_lane_s16((a), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmull_laneq_s16(simde_int16x4_t a, simde_int16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_int32x4_private r_; + simde_int16x4_private + a_ = simde_int16x4_to_private(a); + simde_int16x8_private + b_ = simde_int16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqdmullh_s16(a_.values[i], b_.values[lane]); + } + + return simde_int32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmull_laneq_s16(a, v, lane) vqdmull_laneq_s16(a, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmull_laneq_s16 + #define vqdmull_laneq_s16(a, v, lane) simde_vqdmull_laneq_s16((a), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqdmull_lane_s32(simde_int32x2_t a, simde_int32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int64x2_private r_; + simde_int32x2_private + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqdmulls_s32(a_.values[i], b_.values[lane]); + } + + return simde_int64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqdmull_lane_s32(a, v, lane) vqdmull_lane_s32(a, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmull_lane_s32 + #define vqdmull_lane_s32(a, v, lane) simde_vqdmull_lane_s32((a), (v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqdmull_laneq_s32(simde_int32x2_t a, simde_int32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int64x2_private r_; + simde_int32x2_private + a_ = simde_int32x2_to_private(a); + simde_int32x4_private + b_ = simde_int32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqdmulls_s32(a_.values[i], b_.values[lane]); + } + + return simde_int64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmull_laneq_s32(a, v, lane) vqdmull_laneq_s32(a, v, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmull_laneq_s32 + #define vqdmull_laneq_s32(a, v, lane) simde_vqdmull_laneq_s32((a), (v), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDMULL_H) */ diff --git a/simde/arm/neon/qdmull_n.h b/simde/arm/neon/qdmull_n.h new file mode 100644 index 000000000..691802637 --- /dev/null +++ b/simde/arm/neon/qdmull_n.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QDMULL_N_H) +#define SIMDE_ARM_NEON_QDMULL_N_H + +#include "combine.h" +#include "dup_n.h" +#include "qdmull.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmull_n_s16(simde_int16x4_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqdmull_n_s16(a, b); + #else + return simde_vqdmull_s16(a, simde_vdup_n_s16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmull_n_s16 + #define vqdmull_n_s16(a, b) simde_vqdmull_n_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqdmull_n_s32(simde_int32x2_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqdmull_n_s32(a, b); + #else + return simde_vqdmull_s32(a, simde_vdup_n_s32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmull_n_s32 + #define vqdmull_n_s32(a, b) simde_vqdmull_n_s32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDMULL_N_H) */ diff --git a/simde/arm/neon/qshl.h b/simde/arm/neon/qshl.h index 279afe708..4d3e9dbf9 100644 --- a/simde/arm/neon/qshl.h +++ b/simde/arm/neon/qshl.h @@ -23,6 +23,7 @@ * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_QSHL_H) @@ -330,7 +331,7 @@ simde_vqshld_u64(uint64_t a, int64_t b) { #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vqshldb_u64 + #undef vqshld_u64 #define vqshld_u64(a, b) simde_vqshld_u64((a), (b)) #endif diff --git a/simde/arm/neon/reinterpret.h b/simde/arm/neon/reinterpret.h index 990938eb8..d7efddfbb 100644 --- a/simde/arm/neon/reinterpret.h +++ b/simde/arm/neon/reinterpret.h @@ -2331,6 +2331,23 @@ simde_vreinterpret_u64_u32(simde_uint32x2_t a) { #define vreinterpret_u64_u32 simde_vreinterpret_u64_u32 #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vreinterpret_u64_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpret_u64_f16(a); + #else + simde_uint64x1_private r_; + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u64_f16 + #define vreinterpret_u64_f16 simde_vreinterpret_u64_f16 +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vreinterpret_u64_f32(simde_float32x2_t a) { diff --git a/simde/arm/neon/sqrt.h b/simde/arm/neon/sqrt.h index fb0b5e6d6..c4864f505 100644 --- a/simde/arm/neon/sqrt.h +++ b/simde/arm/neon/sqrt.h @@ -37,7 +37,7 @@ SIMDE_FUNCTION_ATTRIBUTES simde_float16 simde_vsqrth_f16(simde_float16 a) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vsqrth_f16(a, b); + return vsqrth_f16(a); #elif defined(simde_math_sqrtf) simde_float32 af = simde_float16_to_float32(a); return simde_float16_from_float32(simde_math_sqrtf(af)); diff --git a/simde/arm/neon/types.h b/simde/arm/neon/types.h index dd3b738c6..623300635 100644 --- a/simde/arm/neon/types.h +++ b/simde/arm/neon/types.h @@ -403,9 +403,13 @@ typedef union { #if defined(SIMDE_ARM_NEON_FP16) typedef float16_t simde_float16_t; typedef float16x4_t simde_float16x4_t; - typedef float16x8_t simde_float16x8_t; typedef float16x4x2_t simde_float16x4x2_t; + typedef float16x4x3_t simde_float16x4x3_t; + typedef float16x4x4_t simde_float16x4x4_t; + typedef float16x8_t simde_float16x8_t; typedef float16x8x2_t simde_float16x8x2_t; + typedef float16x8x3_t simde_float16x8x3_t; + typedef float16x8x4_t simde_float16x8x4_t; #else #define SIMDE_ARM_NEON_NEED_PORTABLE_F16 #endif @@ -567,9 +571,21 @@ typedef union { typedef struct simde_float16x4x2_t { simde_float16x4_t val[2]; } simde_float16x4x2_t; + typedef struct simde_float16x4x3_t { + simde_float16x4_t val[3]; + } simde_float16x4x3_t; + typedef struct simde_float16x4x4_t { + simde_float16x4_t val[4]; + } simde_float16x4x4_t; typedef struct simde_float16x8x2_t { simde_float16x8_t val[2]; } simde_float16x8x2_t; + typedef struct simde_float16x8x3_t { + simde_float16x8_t val[3]; + } simde_float16x8x3_t; + typedef struct simde_float16x8x4_t { + simde_float16x8_t val[4]; + } simde_float16x8x4_t; #else #define SIMDE_ARM_NEON_NEED_PORTABLE_F16 #endif @@ -659,9 +675,21 @@ typedef union { typedef struct simde_float16x4x2_t { simde_float16x4_t val[2]; } simde_float16x4x2_t; + typedef struct simde_float16x4x3_t { + simde_float16x4_t val[3]; + } simde_float16x4x3_t; + typedef struct simde_float16x4x4_t { + simde_float16x4_t val[4]; + } simde_float16x4x4_t; typedef struct simde_float16x8x2_t { simde_float16x8_t val[2]; } simde_float16x8x2_t; + typedef struct simde_float16x8x3_t { + simde_float16x8_t val[3]; + } simde_float16x8x3_t; + typedef struct simde_float16x8x4_t { + simde_float16x8_t val[4]; + } simde_float16x8x4_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F32) typedef simde_float32 simde_float32_t; diff --git a/test/arm/neon/abal.c b/test/arm/neon/abal.c new file mode 100644 index 000000000..7a0774cc2 --- /dev/null +++ b/test/arm/neon/abal.c @@ -0,0 +1,392 @@ +#define SIMDE_TEST_ARM_NEON_INSN abal + +#include "test-neon.h" +#include "../../../simde/arm/neon/abal.h" + +/* https://community.intel.com/t5/Intel-C-Compiler/ICC-generates-incorrect-code/td-p/1199261 */ +#if defined(HEDLEY_INTEL_VERSION) +# define TEST_SIMDE_VABD_NO_TEST_32 +#endif + +static int +test_simde_vabal_s8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[8]; + int8_t b[8]; + int8_t c[8]; + int16_t r[8]; + } test_vec[] = { + { { -INT16_C(19874), -INT16_C(18252), INT16_C(2041), -INT16_C(14241), -INT16_C(30231), -INT16_C(13137), INT16_C(25586), -INT16_C(11143) }, + { INT8_C(56), INT8_C(55), -INT8_C(8), -INT8_C(66), -INT8_C(33), -INT8_C(97), -INT8_C(53), INT8_C(22) }, + { -INT8_C(18), INT8_C(11), INT8_C(45), -INT8_C(104), -INT8_C(124), INT8_C(12), INT8_C(47), -INT8_C(13) }, + { -INT16_C(19800), -INT16_C(18208), INT16_C(2094), -INT16_C(14203), -INT16_C(30140), -INT16_C(13028), INT16_C(25686), -INT16_C(11108) } }, + { { INT16_C(12066), INT16_C(17209), INT16_C(3127), INT16_C(28251), INT16_C(25395), -INT16_C(19511), INT16_C(16348), INT16_C(22812) }, + { -INT8_C(52), INT8_C(16), -INT8_C(7), INT8_C(8), -INT8_C(78), INT8_C(69), INT8_C(85), -INT8_C(11) }, + { INT8_C(19), -INT8_C(46), INT8_C(36), INT8_C(57), -INT8_C(58), -INT8_C(80), -INT8_C(33), -INT8_C(107) }, + { INT16_C(12137), INT16_C(17271), INT16_C(3170), INT16_C(28300), INT16_C(25415), -INT16_C(19362), INT16_C(16466), INT16_C(22908) } }, + { { -INT16_C(8280), INT16_C(25886), -INT16_C(25146), -INT16_C(15756), -INT16_C(1936), INT16_C(13944), -INT16_C(17993), -INT16_C(1374) }, + { -INT8_C(110), -INT8_C(69), -INT8_C(96), -INT8_C(10), INT8_C(103), -INT8_C(123), INT8_C(9), INT8_C(76) }, + { INT8_C(90), -INT8_C(2), -INT8_C(57), INT8_C(19), -INT8_C(106), -INT8_C(2), INT8_C(112), -INT8_C(61) }, + { -INT16_C(8080), INT16_C(25953), -INT16_C(25107), -INT16_C(15727), -INT16_C(1727), INT16_C(14065), -INT16_C(17890), -INT16_C(1237) } }, + { { -INT16_C(22536), -INT16_C(27437), INT16_C(12572), -INT16_C(8347), INT16_C(3077), -INT16_C(11369), INT16_C(17487), -INT16_C(10110) }, + { -INT8_C(127), INT8_C(18), INT8_C(0), INT8_C(65), -INT8_C(69), INT8_C(22), -INT8_C(76), INT8_C(61) }, + { -INT8_C(92), -INT8_C(67), -INT8_C(119), -INT8_C(118), INT8_C(125), -INT8_C(78), -INT8_C(66), INT8_C(105) }, + { -INT16_C(22501), -INT16_C(27352), INT16_C(12691), -INT16_C(8164), INT16_C(3271), -INT16_C(11269), INT16_C(17497), -INT16_C(10066) } }, + { { -INT16_C(30027), INT16_C(12670), INT16_C(8563), INT16_C(32381), -INT16_C(9412), INT16_C(16896), -INT16_C(12916), INT16_C(13214) }, + { INT8_C(62), INT8_C(59), -INT8_C(80), -INT8_C(71), INT8_C(23), INT8_C(20), -INT8_C(49), -INT8_C(9) }, + { INT8_C(89), -INT8_C(102), -INT8_C(63), INT8_C(9), INT8_C(38), -INT8_C(116), INT8_C(7), INT8_C(28) }, + { -INT16_C(30000), INT16_C(12831), INT16_C(8580), INT16_C(32461), -INT16_C(9397), INT16_C(17032), -INT16_C(12860), INT16_C(13251) } }, + { { INT16_C(23148), INT16_C(12510), -INT16_C(9344), INT16_C(9847), INT16_C(5308), INT16_C(16725), INT16_C(25730), -INT16_C(3498) }, + { INT8_C(19), -INT8_C(122), INT8_C(25), INT8_C(42), -INT8_C(70), -INT8_C(61), -INT8_C(91), -INT8_C(101) }, + { INT8_C(59), -INT8_C(111), INT8_C(36), INT8_C(1), INT8_C(70), INT8_C(77), INT8_C(51), INT8_C(31) }, + { INT16_C(23188), INT16_C(12521), -INT16_C(9333), INT16_C(9888), INT16_C(5448), INT16_C(16863), INT16_C(25872), -INT16_C(3366) } }, + { { INT16_C(688), -INT16_C(20307), -INT16_C(6465), -INT16_C(14204), -INT16_C(18860), -INT16_C(19839), INT16_C(11212), -INT16_C(9807) }, + { -INT8_C(94), INT8_C(1), -INT8_C(38), INT8_C(49), -INT8_C(107), INT8_C(57), INT8_C(28), INT8_C(17) }, + { -INT8_C(7), -INT8_C(32), INT8_C(99), INT8_C(54), INT8_C(32), INT8_C(122), -INT8_C(33), -INT8_C(108) }, + { INT16_C(775), -INT16_C(20274), -INT16_C(6328), -INT16_C(14199), -INT16_C(18721), -INT16_C(19774), INT16_C(11273), -INT16_C(9682) } }, + { { INT16_C(27280), -INT16_C(30706), INT16_C(20885), -INT16_C(11166), INT16_C(25965), INT16_C(12772), -INT16_C(23708), INT16_C(4767) }, + { INT8_C(101), INT8_C(87), -INT8_C(57), -INT8_C(17), INT8_C(109), INT8_C(49), INT8_C(52), INT8_C(15) }, + { INT8_C(10), -INT8_C(101), -INT8_C(10), INT8_C(37), -INT8_C(38), INT8_C(37), INT8_C(29), INT8_C(64) }, + { INT16_C(27371), -INT16_C(30518), INT16_C(20932), -INT16_C(11112), INT16_C(26112), INT16_C(12784), -INT16_C(23685), INT16_C(4816) } }, + { { INT16_C(23152), INT16_C(6589), -INT16_C(15056), INT16_C(20297), -INT16_C(19327), INT16_C(31683), INT16_C(16682), -INT16_C(5678) }, + { -INT8_C(11), INT8_C(65), INT8_C(24), INT8_C(107), -INT8_C(80), INT8_C(24), -INT8_C(126), -INT8_C(8) }, + { -INT8_C(56), -INT8_C(110), INT8_C(53), INT8_C(29), -INT8_C(60), INT8_C(121), INT8_C(106), -INT8_C(23) }, + { INT16_C(23197), INT16_C(6764), -INT16_C(15027), INT16_C(20375), -INT16_C(19307), INT16_C(31780), INT16_C(16914), -INT16_C(5663) } }, + { { INT16_C(10294), -INT16_C(5817), INT16_C(9100), INT16_C(15766), -INT16_C(19123), -INT16_C(22417), -INT16_C(23716), INT16_C(27840) }, + { INT8_C(69), -INT8_C(96), -INT8_C(8), -INT8_C(46), INT8_C(7), INT8_C(29), -INT8_C(27), -INT8_C(53) }, + { INT8_C(7), -INT8_C(102), -INT8_C(104), -INT8_C(70), INT8_C(113), INT8_C(44), INT8_C(29), INT8_C(30) }, + { INT16_C(10356), -INT16_C(5811), INT16_C(9196), INT16_C(15790), -INT16_C(19017), -INT16_C(22402), -INT16_C(23660), INT16_C(27923) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); + simde_int8x8_t c = simde_vld1_s8(test_vec[i].c); + simde_int16x8_t r = simde_vabal_s8(a, b, c); + + simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); + } + + return 0; +} +static int +test_simde_vabal_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[4]; + int16_t c[4]; + int32_t r[4]; + } test_vec[] = { + { { INT32_C(18753146), -INT32_C(2003866889), -INT32_C(864249014), INT32_C(197306281) }, + { -INT16_C(3591), -INT16_C(31911), INT16_C(33), INT16_C(6612) }, + { INT16_C(18697), -INT16_C(23075), -INT16_C(2929), INT16_C(28306) }, + { INT32_C(18775434), -INT32_C(2003858053), -INT32_C(864246052), INT32_C(197327975) } }, + { { -INT32_C(1391144799), INT32_C(742148836), -INT32_C(551021601), -INT32_C(902833315) }, + { INT16_C(29949), -INT16_C(3952), -INT16_C(28825), INT16_C(26672) }, + { -INT16_C(23531), INT16_C(14887), -INT16_C(30022), INT16_C(30444) }, + { -INT32_C(1391091319), INT32_C(742167675), -INT32_C(551020404), -INT32_C(902829543) } }, + { { INT32_C(1063611766), INT32_C(441926320), INT32_C(1046962260), INT32_C(1303927204) }, + { INT16_C(10348), -INT16_C(6439), -INT16_C(20970), INT16_C(19505) }, + { INT16_C(4716), INT16_C(11699), INT16_C(12771), -INT16_C(11744) }, + { INT32_C(1063617398), INT32_C(441944458), INT32_C(1046996001), INT32_C(1303958453) } }, + { { -INT32_C(2143257679), INT32_C(992130135), -INT32_C(564197936), INT32_C(914997405) }, + { INT16_C(12394), -INT16_C(32081), -INT16_C(11761), INT16_C(27297) }, + { -INT16_C(1537), INT16_C(20301), INT16_C(19378), INT16_C(24912) }, + { -INT32_C(2143243748), INT32_C(992182517), -INT32_C(564166797), INT32_C(914999790) } }, + { { INT32_C(1220074826), -INT32_C(1351776494), INT32_C(358849784), -INT32_C(1057794144) }, + { INT16_C(31143), -INT16_C(13582), INT16_C(6266), INT16_C(28831) }, + { INT16_C(22705), -INT16_C(22522), INT16_C(23284), INT16_C(418) }, + { INT32_C(1220083264), -INT32_C(1351767554), INT32_C(358866802), -INT32_C(1057765731) } }, + { { -INT32_C(26502870), INT32_C(1549156435), -INT32_C(1940248725), -INT32_C(1018531254) }, + { -INT16_C(22625), -INT16_C(12680), -INT16_C(32610), -INT16_C(20348) }, + { INT16_C(26441), -INT16_C(4578), INT16_C(3566), -INT16_C(2362) }, + { -INT32_C(26453804), INT32_C(1549164537), -INT32_C(1940212549), -INT32_C(1018513268) } }, + { { INT32_C(294877544), INT32_C(393934723), INT32_C(2073567736), INT32_C(2036271058) }, + { -INT16_C(31824), -INT16_C(18526), INT16_C(6501), INT16_C(21343) }, + { INT16_C(17911), INT16_C(11406), INT16_C(12141), INT16_C(12247) }, + { INT32_C(294927279), INT32_C(393964655), INT32_C(2073573376), INT32_C(2036280154) } }, + { { -INT32_C(1298333974), INT32_C(2033301145), -INT32_C(1539477987), -INT32_C(806242335) }, + { -INT16_C(26396), -INT16_C(25530), INT16_C(25085), INT16_C(11920) }, + { -INT16_C(8236), -INT16_C(23934), INT16_C(2332), -INT16_C(5922) }, + { -INT32_C(1298315814), INT32_C(2033302741), -INT32_C(1539455234), -INT32_C(806224493) } }, + { { -INT32_C(408846521), -INT32_C(37450838), -INT32_C(549924034), -INT32_C(1806095098) }, + { INT16_C(27730), -INT16_C(4310), -INT16_C(7974), INT16_C(29047) }, + { INT16_C(13583), -INT16_C(31096), INT16_C(24025), -INT16_C(28181) }, + { -INT32_C(408832374), -INT32_C(37424052), -INT32_C(549892035), -INT32_C(1806037870) } }, + { { -INT32_C(1813757812), -INT32_C(1491300200), -INT32_C(1350343975), -INT32_C(370312500) }, + { INT16_C(27802), -INT16_C(5314), -INT16_C(26863), INT16_C(2157) }, + { INT16_C(32142), -INT16_C(1643), -INT16_C(11173), -INT16_C(19741) }, + { -INT32_C(1813753472), -INT32_C(1491296529), -INT32_C(1350328285), -INT32_C(370290602) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); + simde_int16x4_t c = simde_vld1_s16(test_vec[i].c); + simde_int32x4_t r = simde_vabal_s16(a, b, c); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vabal_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[2]; + int32_t c[2]; + int64_t r[2]; + } test_vec[] = { + { { INT32_C(6463005714558930663), -INT32_C(4139195320040793897) }, + { -INT16_C(98450158), INT16_C(1997215022) }, + { -INT16_C(225523203), -INT16_C(2057155643) }, + { INT32_C(6463005714686003708), -INT32_C(4139195315986423232) } }, + { { -INT32_C(5490267348749435227), -INT32_C(6047969120088065828) }, + { -INT16_C(632228442), INT16_C(723279817) }, + { -INT16_C(1854146244), INT16_C(131895829) }, + { -INT32_C(5490267347527517425), -INT32_C(6047969119496681840) } }, + { { INT32_C(5773633360489854601), -INT32_C(435070372727985286) }, + { -INT16_C(1405355634), INT16_C(197572179) }, + { INT16_C(1628245736), -INT16_C(100234158) }, + { INT32_C(5773633363523455971), -INT32_C(435070372430178949) } }, + { { INT32_C(6732792342444114720), -INT32_C(7148721497261387475) }, + { INT16_C(1275651916), INT16_C(1691872612) }, + { -INT16_C(318194444), -INT16_C(111152145) }, + { INT32_C(6732792344037961080), -INT32_C(7148721495458362718) } }, + { { -INT32_C(6633264670701156405), -INT32_C(317972272775361037) }, + { -INT16_C(1217485004), -INT16_C(1237626879) }, + { -INT16_C(1807669685), -INT16_C(957420452) }, + { -INT32_C(6633264670110971724), -INT32_C(317972272495154610) } }, + { { -INT32_C(1024963991674529220), INT32_C(8920714413382130185) }, + { INT16_C(734861933), -INT16_C(2036203191) }, + { INT16_C(1377486606), -INT16_C(2046018133) }, + { -INT32_C(1024963991031904547), INT32_C(8920714413391945127) } }, + { { -INT32_C(1523246057777963681), -INT32_C(684630109812294627) }, + { -INT16_C(988974737), INT16_C(2018160116) }, + { INT16_C(1290966707), -INT16_C(122305159) }, + { -INT32_C(1523246055498022237), -INT32_C(684630107671829352) } }, + { { -INT32_C(6520241241552503575), -INT32_C(2594711252795020607) }, + { -INT16_C(1529814204), INT16_C(2018427746) }, + { -INT16_C(375365442), -INT16_C(383088817) }, + { -INT32_C(6520241240398054813), -INT32_C(2594711250393504044) } }, + { { -INT32_C(4774129355393256012), INT32_C(2532618448657103471) }, + { INT16_C(2021007980), INT16_C(2144332610) }, + { INT16_C(1131868980), -INT16_C(2137550438) }, + { -INT32_C(4774129354504117012), INT32_C(2532618452938986519) } }, + { { -INT32_C(7043902067628191278), INT32_C(6145117476365756021) }, + { -INT16_C(656770654), INT16_C(846877590) }, + { -INT16_C(1349891154), INT16_C(513524804) }, + { -INT32_C(7043902066935070778), INT32_C(6145117476699108807) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); + simde_int32x2_t c = simde_vld1_s32(test_vec[i].c); + simde_int64x2_t r = simde_vabal_s32(a, b, c); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vabal_u8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[8]; + uint8_t b[8]; + uint8_t c[8]; + uint16_t r[8]; + } test_vec[] = { + { { UINT16_C(10664), UINT16_C(44299), UINT16_C(38551), UINT16_C(60736), UINT16_C(8737), UINT16_C(35643), UINT16_C(18837), UINT16_C(7314) }, + { UINT8_C(135), UINT8_C(81), UINT8_C(246), UINT8_C(248), UINT8_C(90), UINT8_C(28), UINT8_C(55), UINT8_C(56) }, + { UINT8_C(213), UINT8_C(126), UINT8_C(65), UINT8_C(254), UINT8_C(93), UINT8_C(67), UINT8_C(230), UINT8_C(161) }, + { UINT16_C(10742), UINT16_C(44344), UINT16_C(38732), UINT16_C(60742), UINT16_C(8740), UINT16_C(35682), UINT16_C(19012), UINT16_C(7419) } }, + { { UINT16_C(21227), UINT16_C(59118), UINT16_C(5195), UINT16_C(38718), UINT16_C(55755), UINT16_C(41743), UINT16_C(5137), UINT16_C(60549) }, + { UINT8_C(39), UINT8_C(249), UINT8_C(204), UINT8_C(211), UINT8_C(75), UINT8_C(97), UINT8_C(65), UINT8_C(220) }, + { UINT8_C(192), UINT8_C(69), UINT8_C(200), UINT8_C(5), UINT8_C(70), UINT8_C(252), UINT8_C(159), UINT8_C(234) }, + { UINT16_C(21380), UINT16_C(59298), UINT16_C(5199), UINT16_C(38924), UINT16_C(55760), UINT16_C(41898), UINT16_C(5231), UINT16_C(60563) } }, + { { UINT16_C(49871), UINT16_C(50502), UINT16_C(4125), UINT16_C(28816), UINT16_C(63819), UINT16_C(14824), UINT16_C(10093), UINT16_C(58028) }, + { UINT8_C(65), UINT8_C(39), UINT8_C(223), UINT8_C(116), UINT8_C(85), UINT8_C(130), UINT8_C(126), UINT8_C(69) }, + { UINT8_C(134), UINT8_C(83), UINT8_C(45), UINT8_C(56), UINT8_C(89), UINT8_C(92), UINT8_C(105), UINT8_C(130) }, + { UINT16_C(49940), UINT16_C(50546), UINT16_C(4303), UINT16_C(28876), UINT16_C(63823), UINT16_C(14862), UINT16_C(10114), UINT16_C(58089) } }, + { { UINT16_C(44994), UINT16_C(36737), UINT16_C(35703), UINT16_C(49931), UINT16_C(54880), UINT16_C(24804), UINT16_C(60979), UINT16_C(12678) }, + { UINT8_C(60), UINT8_C(15), UINT8_C(6), UINT8_C(124), UINT8_C(223), UINT8_C(51), UINT8_C(224), UINT8_C(97) }, + { UINT8_C(59), UINT8_C(34), UINT8_C(92), UINT8_C(239), UINT8_C(103), UINT8_C(237), UINT8_C(161), UINT8_C(166) }, + { UINT16_C(44995), UINT16_C(36756), UINT16_C(35789), UINT16_C(50046), UINT16_C(55000), UINT16_C(24990), UINT16_C(61042), UINT16_C(12747) } }, + { { UINT16_C(47317), UINT16_C(36310), UINT16_C(62522), UINT16_C(33644), UINT16_C(50960), UINT16_C(30812), UINT16_C(31661), UINT16_C(11315) }, + { UINT8_C(58), UINT8_C(55), UINT8_C(114), UINT8_C(185), UINT8_C(194), UINT8_C(232), UINT8_C(232), UINT8_C(227) }, + { UINT8_C(87), UINT8_C(146), UINT8_C(233), UINT8_C(130), UINT8_C(193), UINT8_C(8), UINT8_C(47), UINT8_C(110) }, + { UINT16_C(47346), UINT16_C(36401), UINT16_C(62641), UINT16_C(33699), UINT16_C(50961), UINT16_C(31036), UINT16_C(31846), UINT16_C(11432) } }, + { { UINT16_C(54579), UINT16_C(43004), UINT16_C(21198), UINT16_C(851), UINT16_C(57477), UINT16_C(53281), UINT16_C(16237), UINT16_C(58376) }, + { UINT8_C(172), UINT8_C(116), UINT8_C(39), UINT8_C(251), UINT8_C(252), UINT8_C(159), UINT8_C(23), UINT8_C(47) }, + { UINT8_C(127), UINT8_C(45), UINT8_C(73), UINT8_C(173), UINT8_C(18), UINT8_C(211), UINT8_C(136), UINT8_C(220) }, + { UINT16_C(54624), UINT16_C(43075), UINT16_C(21232), UINT16_C(929), UINT16_C(57711), UINT16_C(53333), UINT16_C(16350), UINT16_C(58549) } }, + { { UINT16_C(25053), UINT16_C(34450), UINT16_C(60480), UINT16_C(35881), UINT16_C(33907), UINT16_C(62349), UINT16_C(58100), UINT16_C(6780) }, + { UINT8_C(210), UINT8_C(129), UINT8_C(167), UINT8_C(245), UINT8_C(228), UINT8_C(9), UINT8_C(181), UINT8_C(224) }, + { UINT8_C(110), UINT8_C(244), UINT8_C(216), UINT8_C(104), UINT8_C(142), UINT8_C(179), UINT8_C(10), UINT8_C(109) }, + { UINT16_C(25153), UINT16_C(34565), UINT16_C(60529), UINT16_C(36022), UINT16_C(33993), UINT16_C(62519), UINT16_C(58271), UINT16_C(6895) } }, + { { UINT16_C(40961), UINT16_C(17123), UINT16_C(34095), UINT16_C(2257), UINT16_C(63653), UINT16_C(21178), UINT16_C(64379), UINT16_C(1932) }, + { UINT8_C(18), UINT8_C(20), UINT8_C(44), UINT8_C(253), UINT8_C(36), UINT8_C(119), UINT8_C(60), UINT8_C(245) }, + { UINT8_C(155), UINT8_C(76), UINT8_C(6), UINT8_C(198), UINT8_C(3), UINT8_C(72), UINT8_C(174), UINT8_C(59) }, + { UINT16_C(41098), UINT16_C(17179), UINT16_C(34133), UINT16_C(2312), UINT16_C(63686), UINT16_C(21225), UINT16_C(64493), UINT16_C(2118) } }, + { { UINT16_C(22723), UINT16_C(1871), UINT16_C(24774), UINT16_C(28140), UINT16_C(7814), UINT16_C(37660), UINT16_C(46351), UINT16_C(34464) }, + { UINT8_C(23), UINT8_C(5), UINT8_C(191), UINT8_C(140), UINT8_C(70), UINT8_C(151), UINT8_C(171), UINT8_C(111) }, + { UINT8_C(16), UINT8_C(188), UINT8_C(232), UINT8_C(241), UINT8_C(125), UINT8_C(63), UINT8_C(45), UINT8_C(44) }, + { UINT16_C(22730), UINT16_C(2054), UINT16_C(24815), UINT16_C(28241), UINT16_C(7869), UINT16_C(37748), UINT16_C(46477), UINT16_C(34531) } }, + { { UINT16_C(61467), UINT16_C(22307), UINT16_C(43431), UINT16_C(6460), UINT16_C(60025), UINT16_C(283), UINT16_C(57097), UINT16_C(47111) }, + { UINT8_C(93), UINT8_C(41), UINT8_C(121), UINT8_C(132), UINT8_C(17), UINT8_C(218), UINT8_C(232), UINT8_C(137) }, + { UINT8_C(67), UINT8_C(243), UINT8_C(167), UINT8_C(162), UINT8_C(0), UINT8_C(109), UINT8_C(186), UINT8_C(166) }, + { UINT16_C(61493), UINT16_C(22509), UINT16_C(43477), UINT16_C(6490), UINT16_C(60042), UINT16_C(392), UINT16_C(57143), UINT16_C(47140) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); + simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); + simde_uint8x8_t c = simde_vld1_u8(test_vec[i].c); + simde_uint16x8_t r = simde_vabal_u8(a, b, c); + + simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vabal_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[4]; + uint16_t b[4]; + uint16_t c[4]; + uint32_t r[4]; + } test_vec[] = { + { { UINT32_C(278395659), UINT32_C(1941798426), UINT32_C(4255697765), UINT32_C(152378613) }, + { UINT16_C(49641), UINT16_C(56497), UINT16_C(50497), UINT16_C(19047) }, + { UINT16_C(17963), UINT16_C(59142), UINT16_C(45415), UINT16_C(19420) }, + { UINT32_C(278427337), UINT32_C(1941801071), UINT32_C(4255702847), UINT32_C(152378986) } }, + { { UINT32_C(191395037), UINT32_C(2467241647), UINT32_C(348327967), UINT32_C(476956400) }, + { UINT16_C(9597), UINT16_C(36527), UINT16_C(39881), UINT16_C(38505) }, + { UINT16_C(37038), UINT16_C(46655), UINT16_C(32505), UINT16_C(60457) }, + { UINT32_C(191422478), UINT32_C(2467251775), UINT32_C(348335343), UINT32_C(476978352) } }, + { { UINT32_C(1964325848), UINT32_C(1604915231), UINT32_C(3007027398), UINT32_C(2332792010) }, + { UINT16_C(14280), UINT16_C(58599), UINT16_C(61512), UINT16_C(50992) }, + { UINT16_C(11364), UINT16_C(60872), UINT16_C(22758), UINT16_C(7476) }, + { UINT32_C(1964328764), UINT32_C(1604917504), UINT32_C(3007066152), UINT32_C(2332835526) } }, + { { UINT32_C(2263110539), UINT32_C(3056970276), UINT32_C(3871287446), UINT32_C(194424760) }, + { UINT16_C(41574), UINT16_C(21677), UINT16_C(31521), UINT16_C(62703) }, + { UINT16_C(40149), UINT16_C(56480), UINT16_C(34262), UINT16_C(23099) }, + { UINT32_C(2263111964), UINT32_C(3057005079), UINT32_C(3871290187), UINT32_C(194464364) } }, + { { UINT32_C(3351588884), UINT32_C(1336622885), UINT32_C(2607124568), UINT32_C(1120676337) }, + { UINT16_C(12982), UINT16_C(8142), UINT16_C(50577), UINT16_C(58333) }, + { UINT16_C(1510), UINT16_C(53160), UINT16_C(55474), UINT16_C(45308) }, + { UINT32_C(3351600356), UINT32_C(1336667903), UINT32_C(2607129465), UINT32_C(1120689362) } }, + { { UINT32_C(105325235), UINT32_C(2043500245), UINT32_C(660931095), UINT32_C(2087782248) }, + { UINT16_C(23050), UINT16_C(2095), UINT16_C(30017), UINT16_C(25573) }, + { UINT16_C(33778), UINT16_C(42100), UINT16_C(31297), UINT16_C(16723) }, + { UINT32_C(105335963), UINT32_C(2043540250), UINT32_C(660932375), UINT32_C(2087791098) } }, + { { UINT32_C(3666448686), UINT32_C(1737518001), UINT32_C(2117166043), UINT32_C(2831434639) }, + { UINT16_C(15849), UINT16_C(42751), UINT16_C(52907), UINT16_C(60502) }, + { UINT16_C(47667), UINT16_C(19672), UINT16_C(55976), UINT16_C(43385) }, + { UINT32_C(3666480504), UINT32_C(1737541080), UINT32_C(2117169112), UINT32_C(2831451756) } }, + { { UINT32_C(1945633868), UINT32_C(2322589051), UINT32_C(3344526250), UINT32_C(3462182855) }, + { UINT16_C(31637), UINT16_C(65145), UINT16_C(39492), UINT16_C(47509) }, + { UINT16_C(61957), UINT16_C(16067), UINT16_C(53834), UINT16_C(8288) }, + { UINT32_C(1945664188), UINT32_C(2322638129), UINT32_C(3344540592), UINT32_C(3462222076) } }, + { { UINT32_C(2740215773), UINT32_C(1595605772), UINT32_C(1329232400), UINT32_C(1788669758) }, + { UINT16_C(62367), UINT16_C(11982), UINT16_C(59626), UINT16_C(39780) }, + { UINT16_C(45250), UINT16_C(16979), UINT16_C(63502), UINT16_C(4905) }, + { UINT32_C(2740232890), UINT32_C(1595610769), UINT32_C(1329236276), UINT32_C(1788704633) } }, + { { UINT32_C(323341655), UINT32_C(721780532), UINT32_C(553402809), UINT32_C(755600971) }, + { UINT16_C(43978), UINT16_C(48707), UINT16_C(50941), UINT16_C(15969) }, + { UINT16_C(49159), UINT16_C(6730), UINT16_C(27676), UINT16_C(42935) }, + { UINT32_C(323346836), UINT32_C(721822509), UINT32_C(553426074), UINT32_C(755627937) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); + simde_uint16x4_t c = simde_vld1_u16(test_vec[i].c); + simde_uint32x4_t r = simde_vabal_u16(a, b, c); + + simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vabal_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[2]; + uint32_t b[2]; + uint32_t c[2]; + uint64_t r[2]; + } test_vec[] = { + { { UINT64_C(4980484137400382401), UINT64_C(2266324244599854515) }, + { UINT32_C(1379797764), UINT32_C(1241159716) }, + { UINT32_C(2689520430), UINT32_C(2368067983) }, + { UINT64_C(4980484138710105067), UINT64_C(2266324245726762782) } }, + { { UINT64_C(9009389137652207759), UINT64_C(1405495345318952109) }, + { UINT32_C(2165735875), UINT32_C(2844963172) }, + { UINT32_C(3450596080), UINT32_C(1410269298) }, + { UINT64_C(9009389138937067964), UINT64_C(1405495346753645983) } }, + { { UINT64_C(7946017522271812169), UINT64_C(5954939470930688449) }, + { UINT32_C(3977955336), UINT32_C(406581886) }, + { UINT32_C(1202180194), UINT32_C(901550917) }, + { UINT64_C(7946017525047587311), UINT64_C(5954939471425657480) } }, + { { UINT64_C(2075730948399881393), UINT64_C(6233083028381432090) }, + { UINT32_C(523472130), UINT32_C(3276876363) }, + { UINT32_C(575692398), UINT32_C(4111243132) }, + { UINT64_C(2075730948452101661), UINT64_C(6233083029215798859) } }, + { { UINT64_C(2039151327536943025), UINT64_C(675987598434364761) }, + { UINT32_C(1678761089), UINT32_C(1601877560) }, + { UINT32_C(952491186), UINT32_C(4222480985) }, + { UINT64_C(2039151328263212928), UINT64_C(675987601054968186) } }, + { { UINT64_C(4375299356301599279), UINT64_C(3073299433347657432) }, + { UINT32_C(2307356223), UINT32_C(1411192556) }, + { UINT32_C(3885690105), UINT32_C(1683393261) }, + { UINT64_C(4375299357879933161), UINT64_C(3073299433619858137) } }, + { { UINT64_C(3125622259840841973), UINT64_C(4157966308493685739) }, + { UINT32_C(607311755), UINT32_C(1883176105) }, + { UINT32_C(4005499408), UINT32_C(2467576142) }, + { UINT64_C(3125622263239029626), UINT64_C(4157966309078085776) } }, + { { UINT64_C(882865717690512135), UINT64_C(8325625789344375595) }, + { UINT32_C(2171229385), UINT32_C(3654125095) }, + { UINT32_C(1809428305), UINT32_C(1627714497) }, + { UINT64_C(882865718052313215), UINT64_C(8325625791370786193) } }, + { { UINT64_C(6491931873381893843), UINT64_C(4899321772001859037) }, + { UINT32_C(2294161292), UINT32_C(1861598823) }, + { UINT32_C(3870176055), UINT32_C(2719169208) }, + { UINT64_C(6491931874957908606), UINT64_C(4899321772859429422) } }, + { { UINT64_C(3868657064471156875), UINT64_C(1209768485240087287) }, + { UINT32_C(4258016166), UINT32_C(673492032) }, + { UINT32_C(1831741569), UINT32_C(3551386843) }, + { UINT64_C(3868657066897431472), UINT64_C(1209768488117982098) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); + simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); + simde_uint32x2_t c = simde_vld1_u32(test_vec[i].c); + simde_uint64x2_t r = simde_vabal_u32(a, b, c); + + simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); + } + + return 0; +} + + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vabal_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vabal_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vabal_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vabal_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vabal_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vabal_u32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/abal_high.c b/test/arm/neon/abal_high.c new file mode 100644 index 000000000..93181fb1c --- /dev/null +++ b/test/arm/neon/abal_high.c @@ -0,0 +1,432 @@ +#define SIMDE_TEST_ARM_NEON_INSN abal_high + +#include "test-neon.h" +#include "../../../simde/arm/neon/abal_high.h" + +/* https://community.intel.com/t5/Intel-C-Compiler/ICC-generates-incorrect-code/td-p/1199261 */ +#if defined(HEDLEY_INTEL_VERSION) +# define TEST_SIMDE_VABD_NO_TEST_32 +#endif + +static int +test_simde_vabal_high_s8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[8]; + int8_t b[16]; + int8_t c[16]; + int16_t r[8]; + } test_vec[] = { + { { INT16_C(716), INT16_C(29151), INT16_C(29093), INT16_C(27271), INT16_C(5789), INT16_C(17316), -INT16_C(8236), INT16_C(18621) }, + { -INT8_C(127), INT8_C(5), -INT8_C(115), -INT8_C(102), INT8_C(43), -INT8_C(53), INT8_C(15), -INT8_C(4), + -INT8_C(9), INT8_C(37), -INT8_C(117), INT8_C(76), -INT8_C(22), INT8_C(32), INT8_C(94), INT8_C(78) }, + { -INT8_C(11), INT8_C(96), INT8_C(65), INT8_C(60), -INT8_C(96), INT8_C(35), INT8_C(83), -INT8_C(38), + INT8_C(20), -INT8_C(82), -INT8_C(105), -INT8_C(122), INT8_C(52), INT8_C(108), INT8_C(56), INT8_C(97) }, + { INT16_C(745), INT16_C(29270), INT16_C(29105), INT16_C(27469), INT16_C(5863), INT16_C(17392), -INT16_C(8198), INT16_C(18640) } }, + { { -INT16_C(24748), -INT16_C(20075), -INT16_C(16319), INT16_C(28770), -INT16_C(3900), -INT16_C(31101), -INT16_C(6390), INT16_C(14598) }, + { -INT8_C(91), INT8_C(30), -INT8_C(76), -INT8_C(44), INT8_C(43), INT8_C(113), INT8_C(62), INT8_C(52), + INT8_C(51), -INT8_C(97), INT8_C(15), INT8_C(116), -INT8_C(77), INT8_C(76), INT8_C(2), INT8_C(74) }, + { INT8_C(82), INT8_C(83), INT8_C(16), INT8_C(6), -INT8_C(77), -INT8_C(29), -INT8_C(83), INT8_C(53), + -INT8_C(14), -INT8_C(75), -INT8_C(49), INT8_C(90), INT8_C(13), INT8_C(112), INT8_C(58), -INT8_C(95) }, + { -INT16_C(24683), -INT16_C(20053), -INT16_C(16255), INT16_C(28796), -INT16_C(3810), -INT16_C(31065), -INT16_C(6334), INT16_C(14767) } }, + { { -INT16_C(13373), -INT16_C(22548), -INT16_C(22456), -INT16_C(25197), -INT16_C(27484), INT16_C(8486), INT16_C(7004), -INT16_C(832) }, + { INT8_C(59), INT8_C(13), -INT8_C(123), INT8_C(127), -INT8_C(111), INT8_C(122), INT8_C(45), -INT8_C(26), + INT8_C(65), -INT8_C(40), -INT8_C(112), -INT8_C(97), -INT8_C(86), -INT8_C(53), -INT8_C(96), -INT8_C(85) }, + { -INT8_C(72), INT8_C(74), INT8_C(64), INT8_C(127), -INT8_C(109), -INT8_C(75), INT8_C(112), INT8_C(124), + -INT8_C(77), INT8_C(88), -INT8_C(106), INT8_C(5), -INT8_C(23), INT8_C(10), -INT8_C(112), -INT8_C(6) }, + { -INT16_C(13231), -INT16_C(22420), -INT16_C(22450), -INT16_C(25095), -INT16_C(27421), INT16_C(8549), INT16_C(7020), -INT16_C(753) } }, + { { INT16_C(30356), INT16_C(19481), INT16_C(17652), INT16_C(22560), -INT16_C(8359), -INT16_C(5853), INT16_C(17494), INT16_C(26669) }, + { INT8_C(55), -INT8_C(95), -INT8_C(109), -INT8_C(96), INT8_C(50), INT8_C(66), INT8_C(106), INT8_C(37), + INT8_C(66), INT8_C(116), -INT8_C(40), -INT8_C(51), -INT8_C(89), -INT8_C(34), -INT8_C(92), -INT8_C(11) }, + { INT8_C(113), INT8_C(77), -INT8_C(118), -INT8_C(82), -INT8_C(71), -INT8_C(92), INT8_C(110), INT8_C(53), + -INT8_C(64), INT8_C(127), -INT8_C(102), -INT8_C(53), -INT8_C(27), -INT8_C(47), -INT8_C(20), INT8_C(108) }, + { INT16_C(30486), INT16_C(19492), INT16_C(17714), INT16_C(22562), -INT16_C(8297), -INT16_C(5840), INT16_C(17566), INT16_C(26788) } }, + { { INT16_C(19788), -INT16_C(17957), -INT16_C(22695), INT16_C(27620), -INT16_C(18937), -INT16_C(15606), -INT16_C(17612), -INT16_C(16042) }, + { -INT8_C(83), INT8_C(86), -INT8_C(41), INT8_C(84), -INT8_C(110), -INT8_C(19), INT8_C(54), INT8_C(80), + INT8_C(15), INT8_C(125), INT8_C(118), -INT8_C(77), -INT8_C(45), INT8_C(60), INT8_C(54), -INT8_C(36) }, + { -INT8_C(110), INT8_C(40), -INT8_C(108), INT8_C(16), INT8_C(111), INT8_C(41), -INT8_C(44), -INT8_C(96), + INT8_C(46), INT8_C(15), -INT8_C(57), -INT8_C(40), -INT8_C(105), -INT8_C(82), INT8_C(22), INT8_C(104) }, + { INT16_C(19819), -INT16_C(17847), -INT16_C(22520), INT16_C(27657), -INT16_C(18877), -INT16_C(15464), -INT16_C(17580), -INT16_C(15902) } }, + { { -INT16_C(13139), -INT16_C(15416), -INT16_C(9595), -INT16_C(25280), INT16_C(28260), INT16_C(24587), INT16_C(22167), -INT16_C(19553) }, + { -INT8_C(78), INT8_C(44), INT8_C(116), INT8_C(81), -INT8_C(30), -INT8_C(102), -INT8_C(28), INT8_C(28), + -INT8_C(10), -INT8_C(126), -INT8_C(110), -INT8_C(64), INT8_C(20), -INT8_C(46), -INT8_C(49), INT8_C(62) }, + { -INT8_C(112), INT8_C(115), INT8_C(12), -INT8_C(49), -INT8_C(92), INT8_C(47), -INT8_C(45), -INT8_C(125), + -INT8_C(40), INT8_C(28), INT8_C(49), -INT8_C(35), -INT8_C(76), -INT8_C(107), INT8_C(66), INT8_C(92) }, + { -INT16_C(13109), -INT16_C(15262), -INT16_C(9436), -INT16_C(25251), INT16_C(28356), INT16_C(24648), INT16_C(22282), -INT16_C(19523) } }, + { { -INT16_C(19282), INT16_C(22460), -INT16_C(27815), INT16_C(29281), INT16_C(22043), -INT16_C(12003), -INT16_C(13572), INT16_C(30020) }, + { INT8_C(70), -INT8_C(4), -INT8_C(42), -INT8_C(37), INT8_C(94), INT8_C(121), INT8_C(25), -INT8_C(72), + INT8_C(61), INT8_C(10), INT8_C(83), INT8_C(52), INT8_C(1), INT8_C(34), INT8_C(35), -INT8_C(66) }, + { -INT8_C(67), -INT8_C(78), -INT8_C(34), INT8_C(7), -INT8_C(128), -INT8_C(19), INT8_C(4), INT8_C(78), + -INT8_C(99), -INT8_C(115), INT8_C(94), -INT8_C(64), -INT8_C(12), INT8_C(21), -INT8_C(25), INT8_C(66) }, + { -INT16_C(19122), INT16_C(22585), -INT16_C(27804), INT16_C(29397), INT16_C(22056), -INT16_C(11990), -INT16_C(13512), INT16_C(30152) } }, + { { -INT16_C(7538), INT16_C(25602), INT16_C(24299), -INT16_C(11918), INT16_C(3229), -INT16_C(18055), INT16_C(32599), -INT16_C(15279) }, + { -INT8_C(107), INT8_C(66), -INT8_C(115), -INT8_C(100), INT8_C(98), -INT8_C(32), -INT8_C(13), INT8_C(39), + INT8_C(55), -INT8_C(47), -INT8_C(35), INT8_C(28), INT8_C(38), INT8_C(110), INT8_C(44), -INT8_C(29) }, + { INT8_C(116), -INT8_C(101), INT8_C(109), INT8_C(75), INT8_C(39), INT8_C(23), INT8_C(19), -INT8_C(77), + INT8_C(95), -INT8_C(18), -INT8_C(74), INT8_C(35), -INT8_C(113), -INT8_C(79), INT8_C(29), -INT8_C(29) }, + { -INT16_C(7498), INT16_C(25631), INT16_C(24338), -INT16_C(11911), INT16_C(3380), -INT16_C(17866), INT16_C(32614), -INT16_C(15279) } }, + { { INT16_C(27758), INT16_C(22488), INT16_C(6395), INT16_C(25945), INT16_C(28102), -INT16_C(14137), INT16_C(28301), INT16_C(24371) }, + { -INT8_C(63), INT8_C(54), -INT8_C(112), INT8_C(55), INT8_C(99), -INT8_C(70), -INT8_C(84), -INT8_C(116), + -INT8_C(117), -INT8_C(30), INT8_C(42), -INT8_C(45), -INT8_C(12), -INT8_C(3), -INT8_C(80), INT8_C(54) }, + { INT8_C(112), -INT8_C(126), -INT8_C(94), -INT8_C(123), INT8_C(122), INT8_C(16), -INT8_C(23), INT8_C(28), + INT8_C(74), -INT8_C(79), INT8_C(125), -INT8_C(71), -INT8_C(90), -INT8_C(93), -INT8_C(63), INT8_C(115) }, + { INT16_C(27949), INT16_C(22537), INT16_C(6478), INT16_C(25971), INT16_C(28180), -INT16_C(14047), INT16_C(28318), INT16_C(24432) } }, + { { -INT16_C(3775), INT16_C(28998), INT16_C(27693), -INT16_C(15256), INT16_C(28027), -INT16_C(32728), -INT16_C(23584), -INT16_C(9778) }, + { INT8_C(112), INT8_C(80), INT8_C(101), -INT8_C(10), INT8_C(21), -INT8_C(23), INT8_C(6), -INT8_C(101), + -INT8_C(83), -INT8_C(50), INT8_C(87), -INT8_C(58), -INT8_C(37), INT8_C(15), INT8_C(55), -INT8_C(63) }, + { -INT8_C(7), -INT8_C(3), INT8_C(31), INT8_C(94), -INT8_C(103), INT8_C(23), INT8_C(17), INT8_C(62), + -INT8_C(15), INT8_C(46), INT8_C(4), -INT8_C(104), INT8_C(30), INT8_C(51), -INT8_C(38), INT8_C(9) }, + { -INT16_C(3707), INT16_C(29094), INT16_C(27776), -INT16_C(15210), INT16_C(28094), -INT16_C(32692), -INT16_C(23491), -INT16_C(9706) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); + simde_int8x16_t c = simde_vld1q_s8(test_vec[i].c); + simde_int16x8_t r = simde_vabal_high_s8(a, b, c); + + simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); + } + + return 0; +} +static int +test_simde_vabal_high_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[8]; + int16_t c[8]; + int32_t r[4]; + } test_vec[] = { + { { -INT32_C(179829637), INT32_C(188529379), INT32_C(288350722), -INT32_C(517571286) }, + { -INT16_C(13612), -INT16_C(28223), -INT16_C(7027), -INT16_C(15233), -INT16_C(10290), -INT16_C(24042), INT16_C(23927), -INT16_C(25125) }, + { -INT16_C(784), INT16_C(28117), INT16_C(21530), INT16_C(9983), -INT16_C(13365), INT16_C(4343), -INT16_C(30122), INT16_C(21887) }, + { -INT32_C(179826562), INT32_C(188557764), INT32_C(288404771), -INT32_C(517524274) } }, + { { INT32_C(196428168), INT32_C(174905394), -INT32_C(1357007296), INT32_C(1621511496) }, + { INT16_C(18263), -INT16_C(32488), INT16_C(395), INT16_C(1235), INT16_C(28962), INT16_C(31407), -INT16_C(15134), INT16_C(17014) }, + { INT16_C(17062), INT16_C(24153), -INT16_C(7606), INT16_C(17212), -INT16_C(7826), -INT16_C(6303), -INT16_C(11693), INT16_C(17372) }, + { INT32_C(196464956), INT32_C(174943104), -INT32_C(1357003855), INT32_C(1621511854) } }, + { { -INT32_C(804161572), -INT32_C(1193738346), INT32_C(429598036), INT32_C(136395532) }, + { INT16_C(31912), INT16_C(22293), INT16_C(25268), -INT16_C(32551), INT16_C(1997), -INT16_C(651), INT16_C(14691), -INT16_C(14518) }, + { INT16_C(1671), -INT16_C(9415), INT16_C(5287), -INT16_C(19075), INT16_C(14489), -INT16_C(16213), INT16_C(21249), -INT16_C(20147) }, + { -INT32_C(804149080), -INT32_C(1193722784), INT32_C(429604594), INT32_C(136401161) } }, + { { INT32_C(1486856531), INT32_C(1112849963), -INT32_C(4448778), INT32_C(1472436894) }, + { INT16_C(19724), INT16_C(28219), -INT16_C(9804), INT16_C(18085), INT16_C(31900), -INT16_C(16435), INT16_C(9764), -INT16_C(29100) }, + { -INT16_C(23372), -INT16_C(30253), -INT16_C(23622), -INT16_C(27362), -INT16_C(18517), -INT16_C(32112), -INT16_C(17103), -INT16_C(23338) }, + { INT32_C(1486906948), INT32_C(1112865640), -INT32_C(4421911), INT32_C(1472442656) } }, + { { INT32_C(529456900), -INT32_C(112049144), INT32_C(2082647990), INT32_C(104254184) }, + { INT16_C(26185), -INT16_C(9328), -INT16_C(19245), INT16_C(20334), -INT16_C(7030), INT16_C(9092), -INT16_C(16424), -INT16_C(12426) }, + { -INT16_C(25020), -INT16_C(7843), -INT16_C(21867), INT16_C(32706), INT16_C(9564), INT16_C(28046), -INT16_C(19223), -INT16_C(25715) }, + { INT32_C(529473494), -INT32_C(112030190), INT32_C(2082650789), INT32_C(104267473) } }, + { { -INT32_C(2038153826), INT32_C(348526704), -INT32_C(1695251148), INT32_C(309568932) }, + { -INT16_C(21078), -INT16_C(24096), -INT16_C(12069), INT16_C(7162), -INT16_C(5108), INT16_C(26854), INT16_C(32114), INT16_C(29269) }, + { INT16_C(30475), -INT16_C(12380), INT16_C(22264), INT16_C(12142), INT16_C(10088), -INT16_C(32109), INT16_C(17963), -INT16_C(15000) }, + { -INT32_C(2038138630), INT32_C(348585667), -INT32_C(1695236997), INT32_C(309613201) } }, + { { INT32_C(761698564), -INT32_C(426557488), INT32_C(1608948529), INT32_C(1963604586) }, + { -INT16_C(8301), -INT16_C(19858), -INT16_C(29881), INT16_C(21354), INT16_C(19475), -INT16_C(10728), INT16_C(2436), -INT16_C(31490) }, + { -INT16_C(14416), -INT16_C(20272), -INT16_C(12400), INT16_C(7982), -INT16_C(19487), INT16_C(11576), INT16_C(19964), -INT16_C(15614) }, + { INT32_C(761737526), -INT32_C(426535184), INT32_C(1608966057), INT32_C(1963620462) } }, + { { -INT32_C(1630703034), -INT32_C(1886177776), -INT32_C(852485065), -INT32_C(951651993) }, + { INT16_C(23651), INT16_C(7004), -INT16_C(656), -INT16_C(6092), -INT16_C(28478), -INT16_C(12117), -INT16_C(5892), -INT16_C(29227) }, + { INT16_C(32449), -INT16_C(21110), -INT16_C(1208), -INT16_C(16097), INT16_C(152), INT16_C(3396), INT16_C(20552), INT16_C(23176) }, + { -INT32_C(1630674404), -INT32_C(1886162263), -INT32_C(852458621), -INT32_C(951599590) } }, + { { -INT32_C(1596752989), INT32_C(2103359502), INT32_C(57581753), -INT32_C(222500531) }, + { INT16_C(111), INT16_C(2594), -INT16_C(22361), -INT16_C(15272), INT16_C(18492), -INT16_C(4773), INT16_C(31736), INT16_C(13386) }, + { -INT16_C(3931), -INT16_C(27736), -INT16_C(15212), INT16_C(19636), INT16_C(19625), -INT16_C(13211), -INT16_C(26545), INT16_C(30610) }, + { -INT32_C(1596751856), INT32_C(2103367940), INT32_C(57640034), -INT32_C(222483307) } }, + { { -INT32_C(607395563), -INT32_C(409695611), -INT32_C(744659695), INT32_C(458233591) }, + { -INT16_C(20088), -INT16_C(26599), -INT16_C(1512), INT16_C(18133), -INT16_C(31527), -INT16_C(22654), -INT16_C(16130), INT16_C(16565) }, + { -INT16_C(5175), INT16_C(16754), INT16_C(27895), INT16_C(30779), INT16_C(12703), -INT16_C(16165), INT16_C(31881), INT16_C(3404) }, + { -INT32_C(607351333), -INT32_C(409689122), -INT32_C(744611684), INT32_C(458246752) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + simde_int16x8_t c = simde_vld1q_s16(test_vec[i].c); + simde_int32x4_t r = simde_vabal_high_s16(a, b, c); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vabal_high_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[4]; + int32_t c[4]; + int64_t r[2]; + } test_vec[] = { + { { -INT64_C(8335084918904227682), INT64_C(5082616870088972360) }, + { -INT32_C(1203938446), -INT32_C(1379772492), INT32_C(616994359), INT32_C(1083545082) }, + { -INT32_C(1906729359), INT32_C(1015093241), INT32_C(425590947), INT32_C(2029372009) }, + { -INT64_C(8335084918712824270), INT64_C(5082616871034799287) } }, + { { -INT64_C(6692201385341432215), INT64_C(6130272179971188291) }, + { -INT32_C(57658811), INT32_C(326008941), INT32_C(1741884817), -INT32_C(684168561) }, + { -INT32_C(970266010), -INT32_C(1878151217), INT32_C(728958119), INT32_C(18297972) }, + { -INT64_C(6692201384328505517), INT64_C(6130272180673654824) } }, + { { -INT64_C(3228547053428818110), -INT64_C(3775920480530457269) }, + { -INT32_C(1896729348), -INT32_C(1340240637), -INT32_C(252156445), INT32_C(1199317340) }, + { INT32_C(1266695928), INT32_C(1007266895), INT32_C(1686726418), INT32_C(2083350046) }, + { -INT64_C(3228547051489935247), -INT64_C(3775920479646424563) } }, + { { -INT64_C(5079326313705028170), INT64_C(509341014050276175) }, + { -INT32_C(2012068063), -INT32_C(1347573040), -INT32_C(750124298), INT32_C(1242375513) }, + { INT32_C(1262690307), INT32_C(1793471940), -INT32_C(1987031715), -INT32_C(1187341857) }, + { -INT64_C(5079326312468120753), INT64_C(509341016479993545) } }, + { { -INT64_C(910957854149526510), -INT64_C(1899348721453545205) }, + { -INT32_C(1550797276), -INT32_C(878055843), -INT32_C(1270476620), -INT32_C(1674582391) }, + { -INT32_C(720711827), -INT32_C(303966234), -INT32_C(1744041663), INT32_C(44808599) }, + { -INT64_C(910957853675961467), -INT64_C(1899348719734154215) } }, + { { INT64_C(1577253342371792335), -INT64_C(555804675587586129) }, + { -INT32_C(320249636), INT32_C(1372770730), -INT32_C(177109545), INT32_C(579434157) }, + { INT32_C(599026848), -INT32_C(36369713), INT32_C(685560212), -INT32_C(990132316) }, + { INT64_C(1577253343234462092), -INT64_C(555804674018019656) } }, + { { -INT64_C(1253293116004059154), INT64_C(3646315862760396425) }, + { INT32_C(1622631446), INT32_C(355513214), INT32_C(437168953), INT32_C(218678106) }, + { -INT32_C(209040040), INT32_C(534132610), -INT32_C(666122681), INT32_C(337309625) }, + { -INT64_C(1253293114900767520), INT64_C(3646315862879027944) } }, + { { INT64_C(2045382784511940269), INT64_C(6639743016461780648) }, + { -INT32_C(365809482), INT32_C(468234551), -INT32_C(1114166296), INT32_C(945962830) }, + { -INT32_C(2036305755), INT32_C(147964901), INT32_C(401209098), INT32_C(989423590) }, + { INT64_C(2045382786027315663), INT64_C(6639743016505241408) } }, + { { INT64_C(3621735817771913007), -INT64_C(19392873471066367) }, + { -INT32_C(563773313), INT32_C(1015072499), -INT32_C(938101529), INT32_C(184654273) }, + { -INT32_C(1212023151), -INT32_C(1740549979), INT32_C(585644788), INT32_C(2081175347) }, + { INT64_C(3621735819295659324), -INT64_C(19392871574545293) } }, + { { INT64_C(948366791356237313), INT64_C(8912436025019436909) }, + { INT32_C(420891144), INT32_C(1780120745), -INT32_C(219505428), INT32_C(1015328086) }, + { INT32_C(292113003), -INT32_C(2041758122), -INT32_C(1731207938), -INT32_C(1516599938) }, + { INT64_C(948366792867939823), INT64_C(8912436027551364933) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + simde_int32x4_t c = simde_vld1q_s32(test_vec[i].c); + simde_int64x2_t r = simde_vabal_high_s32(a, b, c); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vabal_high_u8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[8]; + uint8_t b[16]; + uint8_t c[16]; + uint16_t r[8]; + } test_vec[] = { + { { UINT16_C(38828), UINT16_C(41435), UINT16_C(57053), UINT16_C(49725), UINT16_C(5202), UINT16_C(59645), UINT16_C(41047), UINT16_C(45717) }, + { UINT8_C(13), UINT8_C(113), UINT8_C(222), UINT8_C(170), UINT8_C(90), UINT8_C(239), UINT8_C(232), UINT8_C(191), + UINT8_C(145), UINT8_C(156), UINT8_C(183), UINT8_C(214), UINT8_C(44), UINT8_C(59), UINT8_C(171), UINT8_C(161) }, + { UINT8_C(44), UINT8_C(117), UINT8_C(34), UINT8_C(97), UINT8_C(162), UINT8_C(73), UINT8_C(214), UINT8_C(138), + UINT8_C(140), UINT8_C(240), UINT8_C(5), UINT8_C(84), UINT8_C(106), UINT8_C(130), UINT8_C(252), UINT8_C(155) }, + { UINT16_C(38833), UINT16_C(41519), UINT16_C(57231), UINT16_C(49855), UINT16_C(5264), UINT16_C(59716), UINT16_C(41128), UINT16_C(45723) } }, + { { UINT16_C(55952), UINT16_C(63200), UINT16_C(36745), UINT16_C(50405), UINT16_C(17176), UINT16_C(54849), UINT16_C(61966), UINT16_C(28342) }, + { UINT8_C(200), UINT8_C(221), UINT8_C(187), UINT8_C(41), UINT8_C(3), UINT8_C(14), UINT8_C(20), UINT8_C(8), + UINT8_C(131), UINT8_C(18), UINT8_C(68), UINT8_C(156), UINT8_C(53), UINT8_C(243), UINT8_C(97), UINT8_C(145) }, + { UINT8_C(205), UINT8_C(31), UINT8_C(190), UINT8_C(113), UINT8_C(237), UINT8_C(252), UINT8_C(169), UINT8_C(111), + UINT8_C(37), UINT8_C(167), UINT8_C(95), UINT8_C(94), UINT8_C(85), UINT8_C(6), UINT8_C(137), UINT8_C(183) }, + { UINT16_C(56046), UINT16_C(63349), UINT16_C(36772), UINT16_C(50467), UINT16_C(17208), UINT16_C(55086), UINT16_C(62006), UINT16_C(28380) } }, + { { UINT16_C(44990), UINT16_C(61661), UINT16_C(22479), UINT16_C(38536), UINT16_C(28216), UINT16_C(21945), UINT16_C(14021), UINT16_C(33738) }, + { UINT8_C(204), UINT8_C(55), UINT8_C(32), UINT8_C(29), UINT8_C(239), UINT8_C(201), UINT8_C(173), UINT8_C(204), + UINT8_C(163), UINT8_C(193), UINT8_C(131), UINT8_C(174), UINT8_C(42), UINT8_C(209), UINT8_C(212), UINT8_C(166) }, + { UINT8_C(196), UINT8_C(175), UINT8_C(189), UINT8_C(6), UINT8_C(157), UINT8_C(244), UINT8_C(102), UINT8_C(191), + UINT8_C(82), UINT8_C(123), UINT8_C(60), UINT8_C(158), UINT8_C(166), UINT8_C(238), UINT8_C(152), UINT8_C(171) }, + { UINT16_C(45071), UINT16_C(61731), UINT16_C(22550), UINT16_C(38552), UINT16_C(28340), UINT16_C(21974), UINT16_C(14081), UINT16_C(33743) } }, + { { UINT16_C(55344), UINT16_C(64793), UINT16_C(22239), UINT16_C(26428), UINT16_C(29945), UINT16_C(1297), UINT16_C(63634), UINT16_C(25808) }, + { UINT8_C(68), UINT8_C(230), UINT8_C(47), UINT8_C(106), UINT8_C(197), UINT8_C(220), UINT8_C(115), UINT8_C(241), + UINT8_C(49), UINT8_C(9), UINT8_C(120), UINT8_C(237), UINT8_C(254), UINT8_C(59), UINT8_C(71), UINT8_C(181) }, + { UINT8_C(217), UINT8_C(139), UINT8_C(42), UINT8_C(202), UINT8_C(199), UINT8_C(158), UINT8_C(239), UINT8_C(209), + UINT8_C(84), UINT8_C(18), UINT8_C(124), UINT8_C(109), UINT8_C(247), UINT8_C(168), UINT8_C(192), UINT8_C(60) }, + { UINT16_C(55379), UINT16_C(64802), UINT16_C(22243), UINT16_C(26556), UINT16_C(29952), UINT16_C(1406), UINT16_C(63755), UINT16_C(25929) } }, + { { UINT16_C(41863), UINT16_C(6642), UINT16_C(38692), UINT16_C(32585), UINT16_C(24700), UINT16_C(3802), UINT16_C(7553), UINT16_C(17842) }, + { UINT8_C(253), UINT8_C(99), UINT8_C(11), UINT8_C(124), UINT8_C(27), UINT8_C(148), UINT8_C(242), UINT8_C(94), + UINT8_C(95), UINT8_C(134), UINT8_C(228), UINT8_C(48), UINT8_C(248), UINT8_C(20), UINT8_C(129), UINT8_C(123) }, + { UINT8_C(89), UINT8_C(1), UINT8_C(16), UINT8_C(105), UINT8_C(150), UINT8_C(24), UINT8_C(245), UINT8_C(53), + UINT8_C(183), UINT8_C(143), UINT8_C(56), UINT8_C(241), UINT8_C(187), UINT8_C(109), UINT8_C(103), UINT8_C(209) }, + { UINT16_C(41951), UINT16_C(6651), UINT16_C(38864), UINT16_C(32778), UINT16_C(24761), UINT16_C(3891), UINT16_C(7579), UINT16_C(17928) } }, + { { UINT16_C(10149), UINT16_C(11963), UINT16_C(9875), UINT16_C(34112), UINT16_C(15679), UINT16_C(6977), UINT16_C(13520), UINT16_C(53297) }, + { UINT8_C(104), UINT8_C(180), UINT8_C(15), UINT8_C(130), UINT8_C(179), UINT8_C(51), UINT8_C(122), UINT8_C(216), + UINT8_C(207), UINT8_C(103), UINT8_C(250), UINT8_C(192), UINT8_C(70), UINT8_C(209), UINT8_C(100), UINT8_C(170) }, + { UINT8_C(121), UINT8_C(12), UINT8_C(116), UINT8_C(114), UINT8_C(198), UINT8_C(162), UINT8_C(89), UINT8_C(79), + UINT8_C(121), UINT8_C(155), UINT8_C(154), UINT8_C(134), UINT8_C(254), UINT8_C(120), UINT8_C(202), UINT8_C(13) }, + { UINT16_C(10235), UINT16_C(12015), UINT16_C(9971), UINT16_C(34170), UINT16_C(15863), UINT16_C(7066), UINT16_C(13622), UINT16_C(53454) } }, + { { UINT16_C(6132), UINT16_C(31154), UINT16_C(19323), UINT16_C(30639), UINT16_C(16383), UINT16_C(35340), UINT16_C(59095), UINT16_C(40893) }, + { UINT8_C(244), UINT8_C(209), UINT8_C(119), UINT8_C(37), UINT8_C(120), UINT8_C(243), UINT8_C(211), UINT8_C(149), + UINT8_C(228), UINT8_C(12), UINT8_C(80), UINT8_C(207), UINT8_C(62), UINT8_C(235), UINT8_C(198), UINT8_C(132) }, + { UINT8_C(193), UINT8_C(206), UINT8_C(53), UINT8_C(55), UINT8_C(83), UINT8_C(195), UINT8_C(13), UINT8_C(119), + UINT8_C(26), UINT8_C(137), UINT8_C(196), UINT8_C(31), UINT8_C(197), UINT8_C(204), UINT8_C(248), UINT8_C(216) }, + { UINT16_C(6334), UINT16_C(31279), UINT16_C(19439), UINT16_C(30815), UINT16_C(16518), UINT16_C(35371), UINT16_C(59145), UINT16_C(40977) } }, + { { UINT16_C(50996), UINT16_C(28664), UINT16_C(52064), UINT16_C(19134), UINT16_C(21713), UINT16_C(53912), UINT16_C(4270), UINT16_C(50615) }, + { UINT8_C(230), UINT8_C(123), UINT8_C(149), UINT8_C(44), UINT8_C(186), UINT8_C(199), UINT8_C(103), UINT8_C(192), + UINT8_C(183), UINT8_C(132), UINT8_C(167), UINT8_C(198), UINT8_C(201), UINT8_C(208), UINT8_C(173), UINT8_C(39) }, + { UINT8_C(143), UINT8_C(130), UINT8_C(94), UINT8_C(50), UINT8_C(151), UINT8_C(238), UINT8_C(186), UINT8_C(198), + UINT8_C(196), UINT8_C(174), UINT8_C(44), UINT8_C(25), UINT8_C(23), UINT8_C(53), UINT8_C(237), UINT8_C(205) }, + { UINT16_C(51009), UINT16_C(28706), UINT16_C(52187), UINT16_C(19307), UINT16_C(21891), UINT16_C(54067), UINT16_C(4334), UINT16_C(50781) } }, + { { UINT16_C(12283), UINT16_C(3982), UINT16_C(34342), UINT16_C(51198), UINT16_C(55355), UINT16_C(56804), UINT16_C(15883), UINT16_C(62019) }, + { UINT8_C(226), UINT8_C(164), UINT8_C(115), UINT8_C(124), UINT8_C(69), UINT8_C(123), UINT8_C(127), UINT8_C(253), + UINT8_C(253), UINT8_C(246), UINT8_C(8), UINT8_C(45), UINT8_C(80), UINT8_C(7), UINT8_C(105), UINT8_C(99) }, + { UINT8_C(246), UINT8_C(12), UINT8_C(48), UINT8_C(245), UINT8_C(253), UINT8_C(204), UINT8_C(22), UINT8_C(52), + UINT8_C(159), UINT8_C(190), UINT8_C(38), UINT8_C(217), UINT8_C(235), UINT8_C(157), UINT8_C(210), UINT8_C(216) }, + { UINT16_C(12377), UINT16_C(4038), UINT16_C(34372), UINT16_C(51370), UINT16_C(55510), UINT16_C(56954), UINT16_C(15988), UINT16_C(62136) } }, + { { UINT16_C(61236), UINT16_C(244), UINT16_C(60163), UINT16_C(46101), UINT16_C(62573), UINT16_C(55461), UINT16_C(53515), UINT16_C(49904) }, + { UINT8_C(245), UINT8_C(137), UINT8_C(119), UINT8_C(134), UINT8_C(62), UINT8_C(113), UINT8_C(252), UINT8_C(228), + UINT8_C(242), UINT8_C(150), UINT8_C(233), UINT8_C(138), UINT8_C(70), UINT8_C(47), UINT8_C(205), UINT8_C(181) }, + { UINT8_C(27), UINT8_C(86), UINT8_C(117), UINT8_C(243), UINT8_C(153), UINT8_C(37), UINT8_C(32), UINT8_C(64), + UINT8_C(24), UINT8_C(234), UINT8_C(152), UINT8_C(40), UINT8_C(105), UINT8_C(251), UINT8_C(177), UINT8_C(226) }, + { UINT16_C(61454), UINT16_C(328), UINT16_C(60244), UINT16_C(46199), UINT16_C(62608), UINT16_C(55665), UINT16_C(53543), UINT16_C(49949) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); + simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); + simde_uint8x16_t c = simde_vld1q_u8(test_vec[i].c); + simde_uint16x8_t r = simde_vabal_high_u8(a, b, c); + + simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vabal_high_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[4]; + uint16_t b[8]; + uint16_t c[8]; + uint32_t r[4]; + } test_vec[] = { + { { UINT32_C(4090475893), UINT32_C(1066812000), UINT32_C(1297469008), UINT32_C(1097358955) }, + { UINT16_C(17093), UINT16_C(38664), UINT16_C(36237), UINT16_C(33187), UINT16_C(32319), UINT16_C(11451), UINT16_C(23995), UINT16_C(8163) }, + { UINT16_C(63374), UINT16_C(21882), UINT16_C(59381), UINT16_C(39509), UINT16_C(52230), UINT16_C(38063), UINT16_C(38488), UINT16_C(41691) }, + { UINT32_C(4090495804), UINT32_C(1066838612), UINT32_C(1297483501), UINT32_C(1097392483) } }, + { { UINT32_C(3668143220), UINT32_C(837778028), UINT32_C(2568867907), UINT32_C(729070020) }, + { UINT16_C(45026), UINT16_C(15729), UINT16_C(25318), UINT16_C(12934), UINT16_C(51238), UINT16_C(48120), UINT16_C(22733), UINT16_C(65250) }, + { UINT16_C(54573), UINT16_C(28945), UINT16_C(18364), UINT16_C(26218), UINT16_C(43985), UINT16_C(27431), UINT16_C(56699), UINT16_C(11810) }, + { UINT32_C(3668150473), UINT32_C(837798717), UINT32_C(2568901873), UINT32_C(729123460) } }, + { { UINT32_C(3300693276), UINT32_C(1956001735), UINT32_C(2747524670), UINT32_C(4227946335) }, + { UINT16_C(41385), UINT16_C(47036), UINT16_C(37811), UINT16_C(27893), UINT16_C(19015), UINT16_C(25007), UINT16_C(40744), UINT16_C(52349) }, + { UINT16_C(55323), UINT16_C(58978), UINT16_C(48127), UINT16_C(2594), UINT16_C(28665), UINT16_C(34424), UINT16_C(40506), UINT16_C(4345) }, + { UINT32_C(3300702926), UINT32_C(1956011152), UINT32_C(2747524908), UINT32_C(4227994339) } }, + { { UINT32_C(2768463207), UINT32_C(1864318329), UINT32_C(1838739674), UINT32_C(3432191474) }, + { UINT16_C(35878), UINT16_C(57703), UINT16_C(15022), UINT16_C(62453), UINT16_C(6667), UINT16_C(55757), UINT16_C(11030), UINT16_C(17475) }, + { UINT16_C(707), UINT16_C(26315), UINT16_C(34468), UINT16_C(38756), UINT16_C(41507), UINT16_C(11665), UINT16_C(27993), UINT16_C(20345) }, + { UINT32_C(2768498047), UINT32_C(1864362421), UINT32_C(1838756637), UINT32_C(3432194344) } }, + { { UINT32_C(1367580523), UINT32_C(1433377796), UINT32_C(2018433319), UINT32_C(406237960) }, + { UINT16_C(39728), UINT16_C(62750), UINT16_C(33428), UINT16_C(43186), UINT16_C(29864), UINT16_C(46544), UINT16_C(40587), UINT16_C(64161) }, + { UINT16_C(16852), UINT16_C(14073), UINT16_C(2481), UINT16_C(22644), UINT16_C(35281), UINT16_C(36062), UINT16_C(31543), UINT16_C(12317) }, + { UINT32_C(1367585940), UINT32_C(1433388278), UINT32_C(2018442363), UINT32_C(406289804) } }, + { { UINT32_C(535887558), UINT32_C(710011742), UINT32_C(4222389537), UINT32_C(135378958) }, + { UINT16_C(11914), UINT16_C(29816), UINT16_C(13942), UINT16_C(20803), UINT16_C(42772), UINT16_C(22249), UINT16_C(26254), UINT16_C(39349) }, + { UINT16_C(35125), UINT16_C(47647), UINT16_C(20892), UINT16_C(31846), UINT16_C(44144), UINT16_C(5008), UINT16_C(16465), UINT16_C(13907) }, + { UINT32_C(535888930), UINT32_C(710028983), UINT32_C(4222399326), UINT32_C(135404400) } }, + { { UINT32_C(1614503899), UINT32_C(1097701245), UINT32_C(3094653303), UINT32_C(848163008) }, + { UINT16_C(64585), UINT16_C(16113), UINT16_C(36513), UINT16_C(33589), UINT16_C(41213), UINT16_C(59878), UINT16_C(3781), UINT16_C(20408) }, + { UINT16_C(50935), UINT16_C(37934), UINT16_C(636), UINT16_C(36665), UINT16_C(33898), UINT16_C(48284), UINT16_C(7063), UINT16_C(45712) }, + { UINT32_C(1614511214), UINT32_C(1097712839), UINT32_C(3094656585), UINT32_C(848188312) } }, + { { UINT32_C(3669890396), UINT32_C(3210809506), UINT32_C(1506732486), UINT32_C(3654965237) }, + { UINT16_C(13159), UINT16_C(10302), UINT16_C(11123), UINT16_C(24510), UINT16_C(57262), UINT16_C(12154), UINT16_C(28736), UINT16_C(14952) }, + { UINT16_C(54972), UINT16_C(1380), UINT16_C(37704), UINT16_C(9622), UINT16_C(22294), UINT16_C(44461), UINT16_C(8278), UINT16_C(18143) }, + { UINT32_C(3669925364), UINT32_C(3210841813), UINT32_C(1506752944), UINT32_C(3654968428) } }, + { { UINT32_C(3702726748), UINT32_C(1812028967), UINT32_C(2443954047), UINT32_C(2042439423) }, + { UINT16_C(57236), UINT16_C(65112), UINT16_C(35778), UINT16_C(47229), UINT16_C(1331), UINT16_C(1773), UINT16_C(22220), UINT16_C(64020) }, + { UINT16_C(64502), UINT16_C(1794), UINT16_C(41140), UINT16_C(9692), UINT16_C(63262), UINT16_C(3177), UINT16_C(1880), UINT16_C(33419) }, + { UINT32_C(3702788679), UINT32_C(1812030371), UINT32_C(2443974387), UINT32_C(2042470024) } }, + { { UINT32_C(4151596664), UINT32_C(2987569361), UINT32_C(3519196835), UINT32_C(110598850) }, + { UINT16_C(32606), UINT16_C(40627), UINT16_C(19309), UINT16_C(19390), UINT16_C(41508), UINT16_C(39509), UINT16_C(35838), UINT16_C(48754) }, + { UINT16_C(22932), UINT16_C(34644), UINT16_C(52343), UINT16_C(7316), UINT16_C(63638), UINT16_C(33535), UINT16_C(16015), UINT16_C(40245) }, + { UINT32_C(4151618794), UINT32_C(2987575335), UINT32_C(3519216658), UINT32_C(110607359) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); + simde_uint16x8_t c = simde_vld1q_u16(test_vec[i].c); + simde_uint32x4_t r = simde_vabal_high_u16(a, b, c); + + simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vabal_high_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[2]; + uint32_t b[4]; + uint32_t c[4]; + uint64_t r[2]; + } test_vec[] = { + { { UINT64_C(9149083788126462255), UINT64_C(7494071422769111859) }, + { UINT32_C(2041335781), UINT32_C(1367478240), UINT32_C(2321114513), UINT32_C(2218486805) }, + { UINT32_C(3958228527), UINT32_C(2693854851), UINT32_C(2017434103), UINT32_C(6484540) }, + { UINT64_C(9149083788430142665), UINT64_C(7494071424981114124) } }, + { { UINT64_C(4423053941100656189), UINT64_C(990907171616478125) }, + { UINT32_C(3796362156), UINT32_C(2589156196), UINT32_C(1422143894), UINT32_C(2696813919) }, + { UINT32_C(867892066), UINT32_C(3702295027), UINT32_C(127003855), UINT32_C(2983215031) }, + { UINT64_C(4423053942395796228), UINT64_C(990907171902879237) } }, + { { UINT64_C(7329956104464289737), UINT64_C(72758202737254580) }, + { UINT32_C(4037095348), UINT32_C(2706697874), UINT32_C(3469671657), UINT32_C(127303580) }, + { UINT32_C(4062092470), UINT32_C(120010243), UINT32_C(3745676276), UINT32_C(3592643144) }, + { UINT64_C(7329956104740294356), UINT64_C(72758206202594144) } }, + { { UINT64_C(5206029364576848437), UINT64_C(5475027404289764645) }, + { UINT32_C(1399070209), UINT32_C(3099532744), UINT32_C(227346930), UINT32_C(1264826698) }, + { UINT32_C(1448391272), UINT32_C(463258493), UINT32_C(3154072378), UINT32_C(397004138) }, + { UINT64_C(5206029367503573885), UINT64_C(5475027405157587205) } }, + { { UINT64_C(4323526344265291552), UINT64_C(7260474860496992020) }, + { UINT32_C(645283633), UINT32_C(3657604684), UINT32_C(2091440748), UINT32_C(1590070019) }, + { UINT32_C(285906177), UINT32_C(3523096247), UINT32_C(2690720407), UINT32_C(4020741804) }, + { UINT64_C(4323526344864571211), UINT64_C(7260474862927663805) } }, + { { UINT64_C(1272135283094720421), UINT64_C(6294270365765646379) }, + { UINT32_C(1551131813), UINT32_C(212006481), UINT32_C(4051741288), UINT32_C(3679029671) }, + { UINT32_C(3027198743), UINT32_C(1339664388), UINT32_C(586741341), UINT32_C(1014799411) }, + { UINT64_C(1272135286559720368), UINT64_C(6294270368429876639) } }, + { { UINT64_C(1848326113476440136), UINT64_C(3301212301176947689) }, + { UINT32_C(3038796523), UINT32_C(571428203), UINT32_C(3729735983), UINT32_C(3044120808) }, + { UINT32_C(3747235367), UINT32_C(3338236402), UINT32_C(3213814495), UINT32_C(1562834250) }, + { UINT64_C(1848326113992361624), UINT64_C(3301212302658234247) } }, + { { UINT64_C(4819464941550472272), UINT64_C(769069127010791191) }, + { UINT32_C(1372130262), UINT32_C(3737707021), UINT32_C(2347251705), UINT32_C(1449293230) }, + { UINT32_C(2853048715), UINT32_C(4169309797), UINT32_C(3631039774), UINT32_C(1783795855) }, + { UINT64_C(4819464942834260341), UINT64_C(769069127345293816) } }, + { { UINT64_C(899408302338136502), UINT64_C(4465131751387657149) }, + { UINT32_C(1832530217), UINT32_C(2112373050), UINT32_C(2047246013), UINT32_C(3014076926) }, + { UINT32_C(2731895957), UINT32_C(2190050819), UINT32_C(3673257953), UINT32_C(1089696283) }, + { UINT64_C(899408303964148442), UINT64_C(4465131753312037792) } }, + { { UINT64_C(9143533208000454769), UINT64_C(5199679203301077444) }, + { UINT32_C(2745288759), UINT32_C(3266596383), UINT32_C(2141100959), UINT32_C(850119357) }, + { UINT32_C(3889231510), UINT32_C(3931186667), UINT32_C(3102691051), UINT32_C(2429610128) }, + { UINT64_C(9143533208962044861), UINT64_C(5199679204880568215) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); + simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); + simde_uint32x4_t c = simde_vld1q_u32(test_vec[i].c); + simde_uint64x2_t r = simde_vabal_high_u32(a, b, c); + + simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); + } + + return 0; +} + + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vabal_high_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vabal_high_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vabal_high_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vabal_high_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vabal_high_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vabal_high_u32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/add_testgen.py b/test/arm/neon/add_testgen.py new file mode 100644 index 000000000..66d6ad5ad --- /dev/null +++ b/test/arm/neon/add_testgen.py @@ -0,0 +1,114 @@ +import os +import re +import csv + + +def gen_test(v_type_list, v_ele_list, v_name_list, func_name): + print(v_type_list) + print(v_ele_list) + print(v_name_list) + test_content = ''' +#else + fputc('\\n', stdout); + for (int i = 0 ; i < 8 ; i++) {\n''' + for i in range(len(v_type_list)-1): + test_content = test_content+' simde_'+v_type_list[i]+v_ele_list[i]+'_t '+v_name_list[i]+' = simde_test_arm_neon_random_'+v_type_list[i][0]+v_ele_list[i]+'();\n' + test_content = test_content+' simde_'+v_type_list[-1]+v_ele_list[-1]+'_t '+v_name_list[-1]+' = '+func_name+'(' + for i in range(len(v_name_list)-1): + if i != len(v_name_list)-2: + test_content = test_content+v_name_list[i]+', ' + else: + test_content = test_content+v_name_list[i]+');\n\n' + + for i in range(len(v_name_list)): + if i == 0: + test_content = test_content + ' simde_test_arm_neon_write_'+v_type_list[i][0]+v_ele_list[i]+'(2, '+v_name_list[i]+', SIMDE_TEST_VEC_POS_FIRST);\n' + elif i == len(v_name_list)-1: + test_content = test_content + ' simde_test_arm_neon_write_'+v_type_list[i][0]+v_ele_list[i]+'(2, '+v_name_list[i]+', SIMDE_TEST_VEC_POS_LAST);\n' + else: + test_content = test_content + ' simde_test_arm_neon_write_'+v_type_list[i][0]+v_ele_list[i]+'(2, '+v_name_list[i]+', SIMDE_TEST_VEC_POS_MIDDLE);\n' + test_content = test_content + ' }\n return 1;\n#endif\n' + + return test_content + + +type_list = [["float16"], + ["float32", "float"], + ["float64", "double"], + ["uint8"], + ["uint16"], + ["uint32", "unsigned int", "unsigned"], + ["uint64"], + ["int8"], + ["int16"], + ["int32", "int"], + ["int64"]] + +dic_type_list = {"float16":["float", "16"], + "float32":["float", "32"],} + +def main_gen(file_path): + # Open the file for reading + with open(file_path, 'r') as file: + lines = file.readlines() + for i in range(len(lines)): + if "static int" in lines[i]: + if "#if 1" not in lines[i+2]: + func_name = lines[i+1][5:lines[i+1].find(' ')] + lines.insert(i+2, '#if 1\n') + print(f"line numbers: {i}, {func_name}") + # get input para + v_type_list = [] # ex. float16 or uint32 + v_ele_list = [] # ex. 32x2 + v_name_list = [] # ex. a + for j in range(i, i+1000, 1): + if "struct" in lines[j]: + while 'test_vec' not in lines[j]: + j += 1 + # get type + found = False + variable_len = ['1'] + for rows in range(len(type_list)): + if not found: + for cols in range(len(type_list[rows])): + if type_list[rows][cols] in lines[j]: + v_type = '' + for c in type_list[rows][0]: + if c.isdigit(): + break + v_type += c + v_type_list.append(v_type) + found = True + variable_len = re.findall(r'\d+', type_list[rows][0]) + break + else: + break + # get elements + if '[' in lines[j] and '}' not in lines[j]: + v_ele_list.append(variable_len[0]+'x'+lines[j][lines[j].find('[')+1:lines[j].find(']')]) + v_name_list.append(lines[j][lines[j].rfind(' ')+1:lines[j].rfind('[')]) + elif '}' not in lines[j]: + v_ele_list.append(variable_len[0]+'x1') + v_name_list.append(lines[j][lines[j].rfind(' ')+1:lines[j].rfind(';')]) + if "return" in lines[j]: + # Add gen_test function + add_content = gen_test(v_type_list, v_ele_list, v_name_list, func_name) + lines.insert(j+1, add_content) + break + # Write the modified content back to the file + with open(file_path, 'w') as file: + file.writelines(lines) + + pass + + +if __name__ == '__main__': + # Open the modify_c.txt file and read its contents + with open('modify_c.txt', 'r') as modify_c_file: + file_names = modify_c_file.read().splitlines() + + for file_name in file_names: + print(f'Start {file_name}') + main_gen(file_name) + print(f'Done {file_name}') + diff --git a/test/arm/neon/cale.c b/test/arm/neon/cale.c new file mode 100644 index 000000000..f303c55a1 --- /dev/null +++ b/test/arm/neon/cale.c @@ -0,0 +1,518 @@ +#define SIMDE_TEST_ARM_NEON_INSN cale + +#include "test-neon.h" +#include "../../../simde/arm/neon/cale.h" + +static int +test_simde_vcaleh_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 b; + simde_float32 a; + uint16_t r; + } test_vec[] = { + { SIMDE_FLOAT32_C( -68.07), + SIMDE_FLOAT32_C( -41.89), + UINT16_MAX }, + { SIMDE_FLOAT32_C( 94.15), + SIMDE_FLOAT32_C( -23.64), + UINT16_MAX }, + { SIMDE_FLOAT32_C( -16.98), + SIMDE_FLOAT32_C( 36.07), + UINT16_C( 0) }, + { SIMDE_FLOAT32_C( 8.15), + SIMDE_FLOAT32_C( -14.55), + UINT16_C( 0) }, + { SIMDE_FLOAT32_C( -74.90), + SIMDE_FLOAT32_C( 20.98), + UINT16_MAX }, + { SIMDE_FLOAT32_C( -93.79), + SIMDE_FLOAT32_C( 28.02), + UINT16_MAX }, + { SIMDE_FLOAT32_C( -40.82), + SIMDE_FLOAT32_C( 11.37), + UINT16_MAX }, + { SIMDE_FLOAT32_C( 41.99), + SIMDE_FLOAT32_C( 40.71), + UINT16_MAX } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint16_t r = simde_vcaleh_f16(simde_float16_from_float32(test_vec[i].a), simde_float16_from_float32(test_vec[i].b)); + + simde_assert_equal_u16(r, test_vec[i].r); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32_t a = simde_test_codegen_random_f32(-100.0f, 100.0f); + simde_float32_t b = simde_test_codegen_random_f32(-100.0f, 100.0f); + uint16_t r = simde_vcaleh_f16(simde_float16_from_float32(a), simde_float16_from_float32(b)); + + simde_test_codegen_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcales_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 b; + simde_float32 a; + uint32_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_NANS) + { SIMDE_MATH_NANF, + SIMDE_FLOAT32_C( 0.52), + UINT32_C( 0) }, + { SIMDE_FLOAT32_C( 705.02), + SIMDE_MATH_NANF, + UINT32_C( 0) }, + { SIMDE_MATH_NANF, + SIMDE_MATH_NANF, + UINT32_C( 0) }, + #endif + + { SIMDE_FLOAT32_C( 8.79), + SIMDE_FLOAT32_C( 792.83), + UINT32_C( 0) }, + { SIMDE_FLOAT32_C( -399.97), + SIMDE_FLOAT32_C( -256.84), + UINT32_MAX }, + { SIMDE_FLOAT32_C( 231.75), + SIMDE_FLOAT32_C( -411.54), + UINT32_C( 0) }, + { SIMDE_FLOAT32_C( 864.59), + SIMDE_FLOAT32_C( -881.95), + UINT32_C( 0) }, + { SIMDE_FLOAT32_C( -814.20), + SIMDE_FLOAT32_C( 479.81), + UINT32_MAX }, + { SIMDE_FLOAT32_C( 263.32), + SIMDE_FLOAT32_C( -797.51), + UINT32_C( 0) }, + { SIMDE_FLOAT32_C( 321.47), + SIMDE_FLOAT32_C( -74.97), + UINT32_MAX }, + { SIMDE_FLOAT32_C( -57.92), + SIMDE_FLOAT32_C( 535.57), + UINT32_C( 0) } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint32_t r = simde_vcales_f32(test_vec[i].a, test_vec[i].b); + + simde_assert_equal_u32(r, test_vec[i].r); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32_t a = simde_test_codegen_random_f32(-1000.0f, 1000.0f); + simde_float32_t b = simde_test_codegen_random_f32(-1000.0f, 1000.0f); + uint32_t r = simde_vcales_f32(a, b); + + simde_test_codegen_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcaled_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float64 b; + simde_float64 a; + uint64_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_NANS) + { SIMDE_MATH_NAN, + SIMDE_FLOAT64_C( 0.52), + UINT64_C( 0) }, + { SIMDE_FLOAT64_C( 705.02), + SIMDE_MATH_NAN, + UINT64_C( 0) }, + { SIMDE_MATH_NAN, + SIMDE_MATH_NAN, + UINT64_C( 0) }, + #endif + + { SIMDE_FLOAT64_C( -111.66), + SIMDE_FLOAT64_C( -149.68), + UINT64_C( 0) }, + { SIMDE_FLOAT64_C( -365.17), + SIMDE_FLOAT64_C( -219.70), + UINT64_MAX }, + { SIMDE_FLOAT64_C( -45.32), + SIMDE_FLOAT64_C( 606.55), + UINT64_C( 0) }, + { SIMDE_FLOAT64_C( -324.50), + SIMDE_FLOAT64_C( -332.43), + UINT64_C( 0) }, + { SIMDE_FLOAT64_C( 611.77), + SIMDE_FLOAT64_C( 425.54), + UINT64_MAX }, + { SIMDE_FLOAT64_C( 910.11), + SIMDE_FLOAT64_C( 648.44), + UINT64_MAX }, + { SIMDE_FLOAT64_C( 572.56), + SIMDE_FLOAT64_C( -409.05), + UINT64_MAX }, + { SIMDE_FLOAT64_C( 265.81), + SIMDE_FLOAT64_C( -418.65), + UINT64_C( 0) } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint64_t r = simde_vcaled_f64(test_vec[i].a, test_vec[i].b); + + simde_assert_equal_u64(r, test_vec[i].r); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64_t a = simde_test_codegen_random_f64(-1000.0, 1000.0); + simde_float64_t b = simde_test_codegen_random_f64(-1000.0, 1000.0); + uint64_t r = simde_vcaled_f64(a, b); + + simde_test_codegen_write_f64(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcale_f16 (SIMDE_MUNIT_TEST_ARGS) { + #if 1 + struct { + simde_float16 b[4]; + simde_float16 a[4]; + uint16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 506.50), SIMDE_FLOAT16_VALUE( 580.50), SIMDE_FLOAT16_VALUE( 209.88), SIMDE_FLOAT16_VALUE( -273.25) }, + { SIMDE_FLOAT16_VALUE( -451.25), SIMDE_FLOAT16_VALUE( -948.00), SIMDE_FLOAT16_VALUE( 325.00), SIMDE_FLOAT16_VALUE( 577.50) }, + { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -715.50), SIMDE_FLOAT16_VALUE( -305.50), SIMDE_FLOAT16_VALUE( -358.25), SIMDE_FLOAT16_VALUE( 5.56) }, + { SIMDE_FLOAT16_VALUE( 466.75), SIMDE_FLOAT16_VALUE( 482.25), SIMDE_FLOAT16_VALUE( -649.50), SIMDE_FLOAT16_VALUE( 274.00) }, + { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( 386.00), SIMDE_FLOAT16_VALUE( -44.34), SIMDE_FLOAT16_VALUE( -28.00), SIMDE_FLOAT16_VALUE( -189.50) }, + { SIMDE_FLOAT16_VALUE( -874.00), SIMDE_FLOAT16_VALUE( 179.12), SIMDE_FLOAT16_VALUE( 498.25), SIMDE_FLOAT16_VALUE( 26.06) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( -109.38), SIMDE_FLOAT16_VALUE( -715.50), SIMDE_FLOAT16_VALUE( 598.00), SIMDE_FLOAT16_VALUE( 66.88) }, + { SIMDE_FLOAT16_VALUE( 604.50), SIMDE_FLOAT16_VALUE( -889.50), SIMDE_FLOAT16_VALUE( -76.75), SIMDE_FLOAT16_VALUE( 111.31) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( 691.00), SIMDE_FLOAT16_VALUE( -867.00), SIMDE_FLOAT16_VALUE( 838.00), SIMDE_FLOAT16_VALUE( -760.00) }, + { SIMDE_FLOAT16_VALUE( -815.00), SIMDE_FLOAT16_VALUE( 163.00), SIMDE_FLOAT16_VALUE( 817.50), SIMDE_FLOAT16_VALUE( -530.50) }, + { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( 857.50), SIMDE_FLOAT16_VALUE( -540.50), SIMDE_FLOAT16_VALUE( 475.25), SIMDE_FLOAT16_VALUE( 324.25) }, + { SIMDE_FLOAT16_VALUE( 941.50), SIMDE_FLOAT16_VALUE( 826.00), SIMDE_FLOAT16_VALUE( -401.75), SIMDE_FLOAT16_VALUE( 327.75) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -218.38), SIMDE_FLOAT16_VALUE( 570.00), SIMDE_FLOAT16_VALUE( -862.00), SIMDE_FLOAT16_VALUE( -92.25) }, + { SIMDE_FLOAT16_VALUE( -250.75), SIMDE_FLOAT16_VALUE( 636.50), SIMDE_FLOAT16_VALUE( 934.00), SIMDE_FLOAT16_VALUE( 640.00) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( 920.50), SIMDE_FLOAT16_VALUE( 532.00), SIMDE_FLOAT16_VALUE( -293.25), SIMDE_FLOAT16_VALUE( 525.50) }, + { SIMDE_FLOAT16_VALUE( 642.50), SIMDE_FLOAT16_VALUE( 630.00), SIMDE_FLOAT16_VALUE( -363.25), SIMDE_FLOAT16_VALUE( 333.75) }, + { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_uint16x4_t r = simde_vcale_f16(a, b); + + simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); + } + + return 0; + #else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_uint16x4_t r = simde_vcale_f16(a, b); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; + #endif +} + +static int +test_simde_vcale_f32 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float32 b[2]; + simde_float32 a[2]; + uint32_t r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 311.69), SIMDE_FLOAT32_C( -932.68) }, + { SIMDE_FLOAT32_C( 98.33), SIMDE_FLOAT32_C( -552.98) }, + { UINT32_MAX, UINT32_MAX } }, + { { SIMDE_FLOAT32_C( 959.61), SIMDE_FLOAT32_C( 617.75) }, + { SIMDE_FLOAT32_C( -197.11), SIMDE_FLOAT32_C( 562.98) }, + { UINT32_MAX, UINT32_MAX } }, + { { SIMDE_FLOAT32_C( 468.98), SIMDE_FLOAT32_C( -916.49) }, + { SIMDE_FLOAT32_C( 965.35), SIMDE_FLOAT32_C( 700.25) }, + { UINT32_C( 0), UINT32_MAX } }, + { { SIMDE_FLOAT32_C( -647.13), SIMDE_FLOAT32_C( -147.35) }, + { SIMDE_FLOAT32_C( -117.68), SIMDE_FLOAT32_C( -241.37) }, + { UINT32_MAX, UINT32_C( 0) } }, + { { SIMDE_FLOAT32_C( -664.10), SIMDE_FLOAT32_C( -976.12) }, + { SIMDE_FLOAT32_C( 874.22), SIMDE_FLOAT32_C( -12.94) }, + { UINT32_C( 0), UINT32_MAX } }, + { { SIMDE_FLOAT32_C( 25.04), SIMDE_FLOAT32_C( -125.75) }, + { SIMDE_FLOAT32_C( 212.15), SIMDE_FLOAT32_C( 782.89) }, + { UINT32_C( 0), UINT32_C( 0) } }, + { { SIMDE_FLOAT32_C( 561.17), SIMDE_FLOAT32_C( 217.87) }, + { SIMDE_FLOAT32_C( -238.74), SIMDE_FLOAT32_C( 679.32) }, + { UINT32_MAX, UINT32_C( 0) } }, + { { SIMDE_FLOAT32_C( -965.46), SIMDE_FLOAT32_C( -738.96) }, + { SIMDE_FLOAT32_C( -711.74), SIMDE_FLOAT32_C( 346.23) }, + { UINT32_MAX, UINT32_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); + simde_uint32x2_t r = simde_vcale_f32(a, b); + + simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vcale_f64 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float64 b[1]; + simde_float64 a[1]; + uint64_t r[1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( 85.26) }, + { SIMDE_FLOAT64_C( 122.65) }, + { UINT64_C( 0) } }, + { { SIMDE_FLOAT64_C( -500.89) }, + { SIMDE_FLOAT64_C( 936.69) }, + { UINT64_C( 0) } }, + { { SIMDE_FLOAT64_C( 594.89) }, + { SIMDE_FLOAT64_C( 788.77) }, + { UINT64_C( 0) } }, + { { SIMDE_FLOAT64_C( 543.70) }, + { SIMDE_FLOAT64_C( -150.09) }, + { UINT64_MAX } }, + { { SIMDE_FLOAT64_C( -875.02) }, + { SIMDE_FLOAT64_C( 442.69) }, + { UINT64_MAX } }, + { { SIMDE_FLOAT64_C( 673.76) }, + { SIMDE_FLOAT64_C( 217.24) }, + { UINT64_MAX } }, + { { SIMDE_FLOAT64_C( 789.39) }, + { SIMDE_FLOAT64_C( 718.78) }, + { UINT64_MAX } }, + { { SIMDE_FLOAT64_C( -511.44) }, + { SIMDE_FLOAT64_C( 752.01) }, + { UINT64_C( 0) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); + simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); + simde_uint64x1_t r = simde_vcale_f64(a, b); + + simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vcaleq_f16 (SIMDE_MUNIT_TEST_ARGS) { + #if 1 + struct { + simde_float16 b[8]; + simde_float16 a[8]; + uint16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 476.50), SIMDE_FLOAT16_VALUE( 975.50), SIMDE_FLOAT16_VALUE( 915.00), SIMDE_FLOAT16_VALUE( -632.50), + SIMDE_FLOAT16_VALUE( -472.75), SIMDE_FLOAT16_VALUE( 317.50), SIMDE_FLOAT16_VALUE( 621.50), SIMDE_FLOAT16_VALUE( 622.50) }, + { SIMDE_FLOAT16_VALUE( -717.00), SIMDE_FLOAT16_VALUE( -404.50), SIMDE_FLOAT16_VALUE( 444.00), SIMDE_FLOAT16_VALUE( -493.50), + SIMDE_FLOAT16_VALUE( -270.75), SIMDE_FLOAT16_VALUE( -59.31), SIMDE_FLOAT16_VALUE( -330.25), SIMDE_FLOAT16_VALUE( 212.00) }, + { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( -169.50), SIMDE_FLOAT16_VALUE( -542.50), SIMDE_FLOAT16_VALUE( -149.88), SIMDE_FLOAT16_VALUE( -427.25), + SIMDE_FLOAT16_VALUE( -861.00), SIMDE_FLOAT16_VALUE( -977.50), SIMDE_FLOAT16_VALUE( 717.00), SIMDE_FLOAT16_VALUE( -377.75) }, + { SIMDE_FLOAT16_VALUE( -880.50), SIMDE_FLOAT16_VALUE( -855.00), SIMDE_FLOAT16_VALUE( -173.38), SIMDE_FLOAT16_VALUE( 725.50), + SIMDE_FLOAT16_VALUE( -76.69), SIMDE_FLOAT16_VALUE( -541.00), SIMDE_FLOAT16_VALUE( -72.81), SIMDE_FLOAT16_VALUE( -600.00) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -566.00), SIMDE_FLOAT16_VALUE( -157.62), SIMDE_FLOAT16_VALUE( -232.50), SIMDE_FLOAT16_VALUE( -38.72), + SIMDE_FLOAT16_VALUE( -840.50), SIMDE_FLOAT16_VALUE( -611.00), SIMDE_FLOAT16_VALUE( -416.50), SIMDE_FLOAT16_VALUE( -557.50) }, + { SIMDE_FLOAT16_VALUE( -15.61), SIMDE_FLOAT16_VALUE( -972.50), SIMDE_FLOAT16_VALUE( -50.97), SIMDE_FLOAT16_VALUE( 713.50), + SIMDE_FLOAT16_VALUE( -31.72), SIMDE_FLOAT16_VALUE( 619.00), SIMDE_FLOAT16_VALUE( -74.44), SIMDE_FLOAT16_VALUE( 799.00) }, + { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -923.50), SIMDE_FLOAT16_VALUE( 775.50), SIMDE_FLOAT16_VALUE( -628.50), SIMDE_FLOAT16_VALUE( -784.50), + SIMDE_FLOAT16_VALUE( 798.00), SIMDE_FLOAT16_VALUE( -911.50), SIMDE_FLOAT16_VALUE( -162.25), SIMDE_FLOAT16_VALUE( 917.50) }, + { SIMDE_FLOAT16_VALUE( -766.50), SIMDE_FLOAT16_VALUE( 664.50), SIMDE_FLOAT16_VALUE( 643.50), SIMDE_FLOAT16_VALUE( 157.00), + SIMDE_FLOAT16_VALUE( -877.00), SIMDE_FLOAT16_VALUE( -429.50), SIMDE_FLOAT16_VALUE( 557.00), SIMDE_FLOAT16_VALUE( -442.75) }, + { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( 413.00), SIMDE_FLOAT16_VALUE( -675.50), SIMDE_FLOAT16_VALUE( 518.50), SIMDE_FLOAT16_VALUE( 572.50), + SIMDE_FLOAT16_VALUE( -286.75), SIMDE_FLOAT16_VALUE( -898.00), SIMDE_FLOAT16_VALUE( -985.00), SIMDE_FLOAT16_VALUE( 697.50) }, + { SIMDE_FLOAT16_VALUE( -870.00), SIMDE_FLOAT16_VALUE( -35.94), SIMDE_FLOAT16_VALUE( 411.25), SIMDE_FLOAT16_VALUE( 98.06), + SIMDE_FLOAT16_VALUE( -417.00), SIMDE_FLOAT16_VALUE( -663.00), SIMDE_FLOAT16_VALUE( -103.12), SIMDE_FLOAT16_VALUE( -340.50) }, + { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( -887.50), SIMDE_FLOAT16_VALUE( 268.50), SIMDE_FLOAT16_VALUE( -125.00), SIMDE_FLOAT16_VALUE( 910.50), + SIMDE_FLOAT16_VALUE( 357.00), SIMDE_FLOAT16_VALUE( 712.50), SIMDE_FLOAT16_VALUE( 828.50), SIMDE_FLOAT16_VALUE( 591.00) }, + { SIMDE_FLOAT16_VALUE( 377.00), SIMDE_FLOAT16_VALUE( 471.75), SIMDE_FLOAT16_VALUE( -252.25), SIMDE_FLOAT16_VALUE( 500.25), + SIMDE_FLOAT16_VALUE( -958.00), SIMDE_FLOAT16_VALUE( -695.50), SIMDE_FLOAT16_VALUE( -942.50), SIMDE_FLOAT16_VALUE( 455.25) }, + { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( -371.00), SIMDE_FLOAT16_VALUE( 576.00), SIMDE_FLOAT16_VALUE( 27.72), SIMDE_FLOAT16_VALUE( 342.25), + SIMDE_FLOAT16_VALUE( 678.50), SIMDE_FLOAT16_VALUE( 42.72), SIMDE_FLOAT16_VALUE( 40.00), SIMDE_FLOAT16_VALUE( 808.00) }, + { SIMDE_FLOAT16_VALUE( -993.00), SIMDE_FLOAT16_VALUE( -548.50), SIMDE_FLOAT16_VALUE( -93.81), SIMDE_FLOAT16_VALUE( -410.25), + SIMDE_FLOAT16_VALUE( -211.88), SIMDE_FLOAT16_VALUE( 803.00), SIMDE_FLOAT16_VALUE( 249.12), SIMDE_FLOAT16_VALUE( -99.44) }, + { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( 71.50), SIMDE_FLOAT16_VALUE( -876.00), SIMDE_FLOAT16_VALUE( -188.88), SIMDE_FLOAT16_VALUE( -571.50), + SIMDE_FLOAT16_VALUE( 837.00), SIMDE_FLOAT16_VALUE( -360.50), SIMDE_FLOAT16_VALUE( -980.50), SIMDE_FLOAT16_VALUE( 213.88) }, + { SIMDE_FLOAT16_VALUE( -889.00), SIMDE_FLOAT16_VALUE( -233.00), SIMDE_FLOAT16_VALUE( -285.75), SIMDE_FLOAT16_VALUE( -846.50), + SIMDE_FLOAT16_VALUE( 71.56), SIMDE_FLOAT16_VALUE( -228.25), SIMDE_FLOAT16_VALUE( 608.50), SIMDE_FLOAT16_VALUE( 700.50) }, + { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_uint16x8_t r = simde_vcaleq_f16(a, b); + simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); + } + + return 0; + #else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_uint16x8_t r = simde_vcaleq_f16(a, b); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; + #endif +} + +static int +test_simde_vcaleq_f32 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float32 b[4]; + simde_float32 a[4]; + uint32_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 880.25), SIMDE_FLOAT32_C( 497.37), SIMDE_FLOAT32_C( 188.18), SIMDE_FLOAT32_C( -214.92) }, + { SIMDE_FLOAT32_C( -292.63), SIMDE_FLOAT32_C( 165.21), SIMDE_FLOAT32_C( -507.32), SIMDE_FLOAT32_C( -554.07) }, + { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, + { { SIMDE_FLOAT32_C( 21.58), SIMDE_FLOAT32_C( -187.66), SIMDE_FLOAT32_C( 52.34), SIMDE_FLOAT32_C( 522.72) }, + { SIMDE_FLOAT32_C( 805.10), SIMDE_FLOAT32_C( -357.26), SIMDE_FLOAT32_C( 451.59), SIMDE_FLOAT32_C( 744.08) }, + { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, + { { SIMDE_FLOAT32_C( 113.67), SIMDE_FLOAT32_C( 334.71), SIMDE_FLOAT32_C( 489.01), SIMDE_FLOAT32_C( 347.72) }, + { SIMDE_FLOAT32_C( -991.50), SIMDE_FLOAT32_C( -625.74), SIMDE_FLOAT32_C( -356.50), SIMDE_FLOAT32_C( 848.94) }, + { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, + { { SIMDE_FLOAT32_C( 90.46), SIMDE_FLOAT32_C( 858.14), SIMDE_FLOAT32_C( -123.29), SIMDE_FLOAT32_C( -917.86) }, + { SIMDE_FLOAT32_C( -788.14), SIMDE_FLOAT32_C( 739.22), SIMDE_FLOAT32_C( 572.18), SIMDE_FLOAT32_C( -907.90) }, + { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, + { { SIMDE_FLOAT32_C( 236.59), SIMDE_FLOAT32_C( -239.64), SIMDE_FLOAT32_C( -122.81), SIMDE_FLOAT32_C( 943.97) }, + { SIMDE_FLOAT32_C( 925.57), SIMDE_FLOAT32_C( 369.86), SIMDE_FLOAT32_C( -610.11), SIMDE_FLOAT32_C( -52.85) }, + { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, + { { SIMDE_FLOAT32_C( -817.80), SIMDE_FLOAT32_C( 442.23), SIMDE_FLOAT32_C( -530.12), SIMDE_FLOAT32_C( 987.30) }, + { SIMDE_FLOAT32_C( -915.03), SIMDE_FLOAT32_C( 921.46), SIMDE_FLOAT32_C( 731.38), SIMDE_FLOAT32_C( 198.64) }, + { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, + { { SIMDE_FLOAT32_C( 256.18), SIMDE_FLOAT32_C( 220.39), SIMDE_FLOAT32_C( -453.64), SIMDE_FLOAT32_C( 264.67) }, + { SIMDE_FLOAT32_C( 594.64), SIMDE_FLOAT32_C( 189.87), SIMDE_FLOAT32_C( 113.62), SIMDE_FLOAT32_C( -314.89) }, + { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, + { { SIMDE_FLOAT32_C( 48.01), SIMDE_FLOAT32_C( 990.32), SIMDE_FLOAT32_C( -232.76), SIMDE_FLOAT32_C( 259.86) }, + { SIMDE_FLOAT32_C( 729.55), SIMDE_FLOAT32_C( -660.58), SIMDE_FLOAT32_C( 351.97), SIMDE_FLOAT32_C( -33.86) }, + { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); + simde_uint32x4_t r = simde_vcaleq_f32(a, b); + simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vcaleq_f64 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float64 b[2]; + simde_float64 a[2]; + uint64_t r[2]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( 230.31), SIMDE_FLOAT64_C( -618.28) }, + { SIMDE_FLOAT64_C( 180.85), SIMDE_FLOAT64_C( 444.53) }, + { UINT64_MAX, UINT64_MAX } }, + { { SIMDE_FLOAT64_C( 217.53), SIMDE_FLOAT64_C( -615.67) }, + { SIMDE_FLOAT64_C( 629.35), SIMDE_FLOAT64_C( -484.75) }, + { UINT64_C( 0), UINT64_MAX } }, + { { SIMDE_FLOAT64_C( 170.44), SIMDE_FLOAT64_C( -454.09) }, + { SIMDE_FLOAT64_C( 330.58), SIMDE_FLOAT64_C( 520.13) }, + { UINT64_C( 0), UINT64_C( 0) } }, + { { SIMDE_FLOAT64_C( -764.76), SIMDE_FLOAT64_C( -650.22) }, + { SIMDE_FLOAT64_C( -78.50), SIMDE_FLOAT64_C( 683.38) }, + { UINT64_MAX, UINT64_C( 0) } }, + { { SIMDE_FLOAT64_C( -812.10), SIMDE_FLOAT64_C( 401.95) }, + { SIMDE_FLOAT64_C( -416.07), SIMDE_FLOAT64_C( 983.29) }, + { UINT64_MAX, UINT64_C( 0) } }, + { { SIMDE_FLOAT64_C( -496.16), SIMDE_FLOAT64_C( 249.85) }, + { SIMDE_FLOAT64_C( 57.13), SIMDE_FLOAT64_C( -909.73) }, + { UINT64_MAX, UINT64_C( 0) } }, + { { SIMDE_FLOAT64_C( -537.53), SIMDE_FLOAT64_C( 707.06) }, + { SIMDE_FLOAT64_C( -45.84), SIMDE_FLOAT64_C( -807.07) }, + { UINT64_MAX, UINT64_C( 0) } }, + { { SIMDE_FLOAT64_C( -27.41), SIMDE_FLOAT64_C( 231.88) }, + { SIMDE_FLOAT64_C( -442.67), SIMDE_FLOAT64_C( -797.10) }, + { UINT64_C( 0), UINT64_C( 0) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); + simde_uint64x2_t r = simde_vcaleq_f64(a, b); + + simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); + } + + return 0; +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vcaleh_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcales_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcaled_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcale_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcale_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcale_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcaleq_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcaleq_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcaleq_f64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/calt.c b/test/arm/neon/calt.c new file mode 100644 index 000000000..32f7177e0 --- /dev/null +++ b/test/arm/neon/calt.c @@ -0,0 +1,521 @@ +#define SIMDE_TEST_ARM_NEON_INSN calt + +#include "test-neon.h" +#include "../../../simde/arm/neon/calt.h" + +static int +test_simde_vcalth_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 b; + simde_float16 a; + uint16_t r; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE( -774.00), + SIMDE_FLOAT16_VALUE( 279.00), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE( -933.00), + SIMDE_FLOAT16_VALUE( 505.00), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE( 510.00), + SIMDE_FLOAT16_VALUE( 91.44), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE( -980.50), + SIMDE_FLOAT16_VALUE( 217.50), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE( 716.50), + SIMDE_FLOAT16_VALUE( 903.00), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( 875.00), + SIMDE_FLOAT16_VALUE( -717.50), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE( -9.65), + SIMDE_FLOAT16_VALUE( 45.19), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( -580.00), + SIMDE_FLOAT16_VALUE( 148.88), + UINT16_MAX } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint16_t r = simde_vcalth_f16(test_vec[i].a, test_vec[i].b); + + simde_assert_equal_u16(r, test_vec[i].r); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_codegen_random_f16(-1000.0f, 1000.0f); + simde_float16_t b = simde_test_codegen_random_f16(-1000.0f, 1000.0f); + uint16_t r = simde_vcalth_f16(a, b); + + simde_test_codegen_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcalts_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 b; + simde_float32 a; + uint32_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_NANS) + { SIMDE_MATH_NANF, + SIMDE_FLOAT32_C( 0.52), + UINT32_C( 0) }, + { SIMDE_FLOAT32_C( 705.02), + SIMDE_MATH_NANF, + UINT32_C( 0) }, + { SIMDE_MATH_NANF, + SIMDE_MATH_NANF, + UINT32_C( 0) }, + #endif + + { SIMDE_FLOAT32_C( 8.79), + SIMDE_FLOAT32_C( 792.83), + UINT32_C( 0) }, + { SIMDE_FLOAT32_C( -399.97), + SIMDE_FLOAT32_C( -256.84), + UINT32_MAX }, + { SIMDE_FLOAT32_C( 231.75), + SIMDE_FLOAT32_C( -411.54), + UINT32_C( 0) }, + { SIMDE_FLOAT32_C( 864.59), + SIMDE_FLOAT32_C( -881.95), + UINT32_C( 0) }, + { SIMDE_FLOAT32_C( -814.20), + SIMDE_FLOAT32_C( 479.81), + UINT32_MAX }, + { SIMDE_FLOAT32_C( 263.32), + SIMDE_FLOAT32_C( -797.51), + UINT32_C( 0) }, + { SIMDE_FLOAT32_C( 321.47), + SIMDE_FLOAT32_C( -74.97), + UINT32_MAX }, + { SIMDE_FLOAT32_C( -57.92), + SIMDE_FLOAT32_C( 535.57), + UINT32_C( 0) } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint32_t r = simde_vcalts_f32(test_vec[i].a, test_vec[i].b); + + simde_assert_equal_u32(r, test_vec[i].r); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32_t a = simde_test_codegen_random_f32(-1000.0f, 1000.0f); + simde_float32_t b = simde_test_codegen_random_f32(-1000.0f, 1000.0f); + uint32_t r = simde_vcalts_f32(a, b); + + simde_test_codegen_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcaltd_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float64 b; + simde_float64 a; + uint64_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_NANS) + { SIMDE_MATH_NAN, + SIMDE_FLOAT64_C( 0.52), + UINT64_C( 0) }, + { SIMDE_FLOAT64_C( 705.02), + SIMDE_MATH_NAN, + UINT64_C( 0) }, + { SIMDE_MATH_NAN, + SIMDE_MATH_NAN, + UINT64_C( 0) }, + #endif + + { SIMDE_FLOAT64_C( -111.66), + SIMDE_FLOAT64_C( -149.68), + UINT64_C( 0) }, + { SIMDE_FLOAT64_C( -365.17), + SIMDE_FLOAT64_C( -219.70), + UINT64_MAX }, + { SIMDE_FLOAT64_C( -45.32), + SIMDE_FLOAT64_C( 606.55), + UINT64_C( 0) }, + { SIMDE_FLOAT64_C( -324.50), + SIMDE_FLOAT64_C( -332.43), + UINT64_C( 0) }, + { SIMDE_FLOAT64_C( 611.77), + SIMDE_FLOAT64_C( 425.54), + UINT64_MAX }, + { SIMDE_FLOAT64_C( 910.11), + SIMDE_FLOAT64_C( 648.44), + UINT64_MAX }, + { SIMDE_FLOAT64_C( 572.56), + SIMDE_FLOAT64_C( -409.05), + UINT64_MAX }, + { SIMDE_FLOAT64_C( 265.81), + SIMDE_FLOAT64_C( -418.65), + UINT64_C( 0) } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint64_t r = simde_vcaltd_f64(test_vec[i].a, test_vec[i].b); + + simde_assert_equal_u64(r, test_vec[i].r); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64_t a = simde_test_codegen_random_f64(-1000.0, 1000.0); + simde_float64_t b = simde_test_codegen_random_f64(-1000.0, 1000.0); + uint64_t r = simde_vcaltd_f64(a, b); + + simde_test_codegen_write_f64(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcalt_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 b[4]; + simde_float16 a[4]; + uint16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 930.50), SIMDE_FLOAT16_VALUE( -703.50), SIMDE_FLOAT16_VALUE( -125.12), SIMDE_FLOAT16_VALUE( 783.00) }, + { SIMDE_FLOAT16_VALUE( 402.50), SIMDE_FLOAT16_VALUE( -327.25), SIMDE_FLOAT16_VALUE( 405.25), SIMDE_FLOAT16_VALUE( -207.75) }, + { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( -29.70), SIMDE_FLOAT16_VALUE( 210.88), SIMDE_FLOAT16_VALUE( -861.00), SIMDE_FLOAT16_VALUE( -614.50) }, + { SIMDE_FLOAT16_VALUE( -248.62), SIMDE_FLOAT16_VALUE( 342.00), SIMDE_FLOAT16_VALUE( -816.50), SIMDE_FLOAT16_VALUE( -39.50) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( 419.75), SIMDE_FLOAT16_VALUE( -664.50), SIMDE_FLOAT16_VALUE( -289.75), SIMDE_FLOAT16_VALUE( 396.25) }, + { SIMDE_FLOAT16_VALUE( -934.50), SIMDE_FLOAT16_VALUE( -18.20), SIMDE_FLOAT16_VALUE( 855.00), SIMDE_FLOAT16_VALUE( 748.50) }, + { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( 145.12), SIMDE_FLOAT16_VALUE( -781.50), SIMDE_FLOAT16_VALUE( -379.50), SIMDE_FLOAT16_VALUE( 23.91) }, + { SIMDE_FLOAT16_VALUE( 854.00), SIMDE_FLOAT16_VALUE( 763.50), SIMDE_FLOAT16_VALUE( -35.88), SIMDE_FLOAT16_VALUE( 784.50) }, + { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -940.00), SIMDE_FLOAT16_VALUE( 839.00), SIMDE_FLOAT16_VALUE( 568.00), SIMDE_FLOAT16_VALUE( 462.25) }, + { SIMDE_FLOAT16_VALUE( -488.25), SIMDE_FLOAT16_VALUE( -26.98), SIMDE_FLOAT16_VALUE( -745.50), SIMDE_FLOAT16_VALUE( 482.00) }, + { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -816.00), SIMDE_FLOAT16_VALUE( -606.50), SIMDE_FLOAT16_VALUE( 867.50), SIMDE_FLOAT16_VALUE( -64.75) }, + { SIMDE_FLOAT16_VALUE( 735.50), SIMDE_FLOAT16_VALUE( -949.00), SIMDE_FLOAT16_VALUE( 895.50), SIMDE_FLOAT16_VALUE( 155.25) }, + { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -613.50), SIMDE_FLOAT16_VALUE( -394.00), SIMDE_FLOAT16_VALUE( -448.50), SIMDE_FLOAT16_VALUE( -548.00) }, + { SIMDE_FLOAT16_VALUE( 587.50), SIMDE_FLOAT16_VALUE( -593.50), SIMDE_FLOAT16_VALUE( -799.00), SIMDE_FLOAT16_VALUE( -267.00) }, + { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( -375.00), SIMDE_FLOAT16_VALUE( -178.62), SIMDE_FLOAT16_VALUE( 757.00), SIMDE_FLOAT16_VALUE( -521.00) }, + { SIMDE_FLOAT16_VALUE( -415.25), SIMDE_FLOAT16_VALUE( -279.00), SIMDE_FLOAT16_VALUE( -736.00), SIMDE_FLOAT16_VALUE( -355.25) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_uint16x4_t r = simde_vld1_u16(test_vec[i].r); + simde_uint16x4_t r_ = simde_vcalt_f16(a, b); + + simde_test_arm_neon_assert_equal_u16x4(r_, r); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_uint16x4_t r = simde_vcalt_f16(a, b); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcalt_f32 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float32 b[2]; + simde_float32 a[2]; + uint32_t r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 311.69), SIMDE_FLOAT32_C( -932.68) }, + { SIMDE_FLOAT32_C( 98.33), SIMDE_FLOAT32_C( -552.98) }, + { UINT32_MAX, UINT32_MAX } }, + { { SIMDE_FLOAT32_C( 959.61), SIMDE_FLOAT32_C( 617.75) }, + { SIMDE_FLOAT32_C( -197.11), SIMDE_FLOAT32_C( 562.98) }, + { UINT32_MAX, UINT32_MAX } }, + { { SIMDE_FLOAT32_C( 468.98), SIMDE_FLOAT32_C( -916.49) }, + { SIMDE_FLOAT32_C( 965.35), SIMDE_FLOAT32_C( 700.25) }, + { UINT32_C( 0), UINT32_MAX } }, + { { SIMDE_FLOAT32_C( -647.13), SIMDE_FLOAT32_C( -147.35) }, + { SIMDE_FLOAT32_C( -117.68), SIMDE_FLOAT32_C( -241.37) }, + { UINT32_MAX, UINT32_C( 0) } }, + { { SIMDE_FLOAT32_C( -664.10), SIMDE_FLOAT32_C( -976.12) }, + { SIMDE_FLOAT32_C( 874.22), SIMDE_FLOAT32_C( -12.94) }, + { UINT32_C( 0), UINT32_MAX } }, + { { SIMDE_FLOAT32_C( 25.04), SIMDE_FLOAT32_C( -125.75) }, + { SIMDE_FLOAT32_C( 212.15), SIMDE_FLOAT32_C( 782.89) }, + { UINT32_C( 0), UINT32_C( 0) } }, + { { SIMDE_FLOAT32_C( 561.17), SIMDE_FLOAT32_C( 217.87) }, + { SIMDE_FLOAT32_C( -238.74), SIMDE_FLOAT32_C( 679.32) }, + { UINT32_MAX, UINT32_C( 0) } }, + { { SIMDE_FLOAT32_C( -965.46), SIMDE_FLOAT32_C( -738.96) }, + { SIMDE_FLOAT32_C( -711.74), SIMDE_FLOAT32_C( 346.23) }, + { UINT32_MAX, UINT32_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); + simde_uint32x2_t r = simde_vcalt_f32(a, b); + + simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vcalt_f64 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float64 b[1]; + simde_float64 a[1]; + uint64_t r[1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( 85.26) }, + { SIMDE_FLOAT64_C( 122.65) }, + { UINT64_C( 0) } }, + { { SIMDE_FLOAT64_C( -500.89) }, + { SIMDE_FLOAT64_C( 936.69) }, + { UINT64_C( 0) } }, + { { SIMDE_FLOAT64_C( 594.89) }, + { SIMDE_FLOAT64_C( 788.77) }, + { UINT64_C( 0) } }, + { { SIMDE_FLOAT64_C( 543.70) }, + { SIMDE_FLOAT64_C( -150.09) }, + { UINT64_MAX } }, + { { SIMDE_FLOAT64_C( -875.02) }, + { SIMDE_FLOAT64_C( 442.69) }, + { UINT64_MAX } }, + { { SIMDE_FLOAT64_C( 673.76) }, + { SIMDE_FLOAT64_C( 217.24) }, + { UINT64_MAX } }, + { { SIMDE_FLOAT64_C( 789.39) }, + { SIMDE_FLOAT64_C( 718.78) }, + { UINT64_MAX } }, + { { SIMDE_FLOAT64_C( -511.44) }, + { SIMDE_FLOAT64_C( 752.01) }, + { UINT64_C( 0) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); + simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); + simde_uint64x1_t r = simde_vcalt_f64(a, b); + + simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vcaltq_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 b[8]; + simde_float16 a[8]; + uint16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 131.50), SIMDE_FLOAT16_VALUE( -289.00), SIMDE_FLOAT16_VALUE( -100.88), SIMDE_FLOAT16_VALUE( -881.00), + SIMDE_FLOAT16_VALUE( -149.50), SIMDE_FLOAT16_VALUE( 558.00), SIMDE_FLOAT16_VALUE( 800.50), SIMDE_FLOAT16_VALUE( -454.00) }, + { SIMDE_FLOAT16_VALUE( 227.38), SIMDE_FLOAT16_VALUE( 969.00), SIMDE_FLOAT16_VALUE( 828.50), SIMDE_FLOAT16_VALUE( -672.50), + SIMDE_FLOAT16_VALUE( -452.25), SIMDE_FLOAT16_VALUE( -720.50), SIMDE_FLOAT16_VALUE( 609.00), SIMDE_FLOAT16_VALUE( -97.19) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( 400.00), SIMDE_FLOAT16_VALUE( -230.38), SIMDE_FLOAT16_VALUE( -477.50), SIMDE_FLOAT16_VALUE( 924.00), + SIMDE_FLOAT16_VALUE( -85.00), SIMDE_FLOAT16_VALUE( -74.06), SIMDE_FLOAT16_VALUE( -465.50), SIMDE_FLOAT16_VALUE( -573.50) }, + { SIMDE_FLOAT16_VALUE( -854.00), SIMDE_FLOAT16_VALUE( 866.00), SIMDE_FLOAT16_VALUE( -726.00), SIMDE_FLOAT16_VALUE( -426.00), + SIMDE_FLOAT16_VALUE( 380.00), SIMDE_FLOAT16_VALUE( -691.00), SIMDE_FLOAT16_VALUE( 747.50), SIMDE_FLOAT16_VALUE( -488.50) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( 19.80), SIMDE_FLOAT16_VALUE( -353.25), SIMDE_FLOAT16_VALUE( -369.25), SIMDE_FLOAT16_VALUE( 870.50), + SIMDE_FLOAT16_VALUE( -795.50), SIMDE_FLOAT16_VALUE( -569.00), SIMDE_FLOAT16_VALUE( -584.00), SIMDE_FLOAT16_VALUE( 432.00) }, + { SIMDE_FLOAT16_VALUE( -600.00), SIMDE_FLOAT16_VALUE( -755.00), SIMDE_FLOAT16_VALUE( 759.50), SIMDE_FLOAT16_VALUE( -52.28), + SIMDE_FLOAT16_VALUE( -475.25), SIMDE_FLOAT16_VALUE( 368.25), SIMDE_FLOAT16_VALUE( 850.50), SIMDE_FLOAT16_VALUE( 924.50) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -862.00), SIMDE_FLOAT16_VALUE( -627.00), SIMDE_FLOAT16_VALUE( 848.50), SIMDE_FLOAT16_VALUE( 52.91), + SIMDE_FLOAT16_VALUE( 299.00), SIMDE_FLOAT16_VALUE( -617.00), SIMDE_FLOAT16_VALUE( 479.50), SIMDE_FLOAT16_VALUE( 445.25) }, + { SIMDE_FLOAT16_VALUE( -751.00), SIMDE_FLOAT16_VALUE( 753.50), SIMDE_FLOAT16_VALUE( -981.00), SIMDE_FLOAT16_VALUE( 629.00), + SIMDE_FLOAT16_VALUE( -937.50), SIMDE_FLOAT16_VALUE( 766.50), SIMDE_FLOAT16_VALUE( -859.50), SIMDE_FLOAT16_VALUE( 82.19) }, + { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( -586.50), SIMDE_FLOAT16_VALUE( -229.00), SIMDE_FLOAT16_VALUE( -47.53), SIMDE_FLOAT16_VALUE( -382.00), + SIMDE_FLOAT16_VALUE( 202.12), SIMDE_FLOAT16_VALUE( 368.75), SIMDE_FLOAT16_VALUE( -950.00), SIMDE_FLOAT16_VALUE( 602.00) }, + { SIMDE_FLOAT16_VALUE( 613.50), SIMDE_FLOAT16_VALUE( 809.50), SIMDE_FLOAT16_VALUE( -450.00), SIMDE_FLOAT16_VALUE( -861.50), + SIMDE_FLOAT16_VALUE( 177.62), SIMDE_FLOAT16_VALUE( -599.50), SIMDE_FLOAT16_VALUE( -937.00), SIMDE_FLOAT16_VALUE( 315.50) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( -226.62), SIMDE_FLOAT16_VALUE( 911.50), SIMDE_FLOAT16_VALUE( -631.50), SIMDE_FLOAT16_VALUE( -927.50), + SIMDE_FLOAT16_VALUE( -705.50), SIMDE_FLOAT16_VALUE( 848.00), SIMDE_FLOAT16_VALUE( 517.50), SIMDE_FLOAT16_VALUE( -456.50) }, + { SIMDE_FLOAT16_VALUE( 601.50), SIMDE_FLOAT16_VALUE( 536.50), SIMDE_FLOAT16_VALUE( -827.50), SIMDE_FLOAT16_VALUE( 664.00), + SIMDE_FLOAT16_VALUE( 303.25), SIMDE_FLOAT16_VALUE( -687.50), SIMDE_FLOAT16_VALUE( -253.88), SIMDE_FLOAT16_VALUE( 717.00) }, + { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( 83.62), SIMDE_FLOAT16_VALUE( 698.50), SIMDE_FLOAT16_VALUE( -665.00), SIMDE_FLOAT16_VALUE( -714.50), + SIMDE_FLOAT16_VALUE( 67.25), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( 888.00), SIMDE_FLOAT16_VALUE( -319.25) }, + { SIMDE_FLOAT16_VALUE( -806.00), SIMDE_FLOAT16_VALUE( -562.00), SIMDE_FLOAT16_VALUE( -180.88), SIMDE_FLOAT16_VALUE( 371.75), + SIMDE_FLOAT16_VALUE( -161.75), SIMDE_FLOAT16_VALUE( -117.88), SIMDE_FLOAT16_VALUE( -312.50), SIMDE_FLOAT16_VALUE( 611.50) }, + { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -206.38), SIMDE_FLOAT16_VALUE( 55.94), SIMDE_FLOAT16_VALUE( 684.00), SIMDE_FLOAT16_VALUE( 88.25), + SIMDE_FLOAT16_VALUE( -96.19), SIMDE_FLOAT16_VALUE( 201.50), SIMDE_FLOAT16_VALUE( 631.50), SIMDE_FLOAT16_VALUE( -494.75) }, + { SIMDE_FLOAT16_VALUE( -261.75), SIMDE_FLOAT16_VALUE( 804.00), SIMDE_FLOAT16_VALUE( -830.50), SIMDE_FLOAT16_VALUE( -958.50), + SIMDE_FLOAT16_VALUE( -883.50), SIMDE_FLOAT16_VALUE( -84.69), SIMDE_FLOAT16_VALUE( 758.50), SIMDE_FLOAT16_VALUE( 200.25) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_uint16x8_t r = simde_vld1q_u16(test_vec[i].r); + simde_uint16x8_t r_ = simde_vcaltq_f16(a, b); + + simde_test_arm_neon_assert_equal_u16x8(r_, r); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_uint16x8_t r = simde_vcaltq_f16(a, b); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcaltq_f32 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float32 b[4]; + simde_float32 a[4]; + uint32_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 880.25), SIMDE_FLOAT32_C( 497.37), SIMDE_FLOAT32_C( 188.18), SIMDE_FLOAT32_C( -214.92) }, + { SIMDE_FLOAT32_C( -292.63), SIMDE_FLOAT32_C( 165.21), SIMDE_FLOAT32_C( -507.32), SIMDE_FLOAT32_C( -554.07) }, + { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, + { { SIMDE_FLOAT32_C( 21.58), SIMDE_FLOAT32_C( -187.66), SIMDE_FLOAT32_C( 52.34), SIMDE_FLOAT32_C( 522.72) }, + { SIMDE_FLOAT32_C( 805.10), SIMDE_FLOAT32_C( -357.26), SIMDE_FLOAT32_C( 451.59), SIMDE_FLOAT32_C( 744.08) }, + { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, + { { SIMDE_FLOAT32_C( 113.67), SIMDE_FLOAT32_C( 334.71), SIMDE_FLOAT32_C( 489.01), SIMDE_FLOAT32_C( 347.72) }, + { SIMDE_FLOAT32_C( -991.50), SIMDE_FLOAT32_C( -625.74), SIMDE_FLOAT32_C( -356.50), SIMDE_FLOAT32_C( 848.94) }, + { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, + { { SIMDE_FLOAT32_C( 90.46), SIMDE_FLOAT32_C( 858.14), SIMDE_FLOAT32_C( -123.29), SIMDE_FLOAT32_C( -917.86) }, + { SIMDE_FLOAT32_C( -788.14), SIMDE_FLOAT32_C( 739.22), SIMDE_FLOAT32_C( 572.18), SIMDE_FLOAT32_C( -907.90) }, + { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, + { { SIMDE_FLOAT32_C( 236.59), SIMDE_FLOAT32_C( -239.64), SIMDE_FLOAT32_C( -122.81), SIMDE_FLOAT32_C( 943.97) }, + { SIMDE_FLOAT32_C( 925.57), SIMDE_FLOAT32_C( 369.86), SIMDE_FLOAT32_C( -610.11), SIMDE_FLOAT32_C( -52.85) }, + { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, + { { SIMDE_FLOAT32_C( -817.80), SIMDE_FLOAT32_C( 442.23), SIMDE_FLOAT32_C( -530.12), SIMDE_FLOAT32_C( 987.30) }, + { SIMDE_FLOAT32_C( -915.03), SIMDE_FLOAT32_C( 921.46), SIMDE_FLOAT32_C( 731.38), SIMDE_FLOAT32_C( 198.64) }, + { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, + { { SIMDE_FLOAT32_C( 256.18), SIMDE_FLOAT32_C( 220.39), SIMDE_FLOAT32_C( -453.64), SIMDE_FLOAT32_C( 264.67) }, + { SIMDE_FLOAT32_C( 594.64), SIMDE_FLOAT32_C( 189.87), SIMDE_FLOAT32_C( 113.62), SIMDE_FLOAT32_C( -314.89) }, + { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, + { { SIMDE_FLOAT32_C( 48.01), SIMDE_FLOAT32_C( 990.32), SIMDE_FLOAT32_C( -232.76), SIMDE_FLOAT32_C( 259.86) }, + { SIMDE_FLOAT32_C( 729.55), SIMDE_FLOAT32_C( -660.58), SIMDE_FLOAT32_C( 351.97), SIMDE_FLOAT32_C( -33.86) }, + { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); + simde_uint32x4_t r = simde_vcaltq_f32(a, b); + simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vcaltq_f64 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float64 b[2]; + simde_float64 a[2]; + uint64_t r[2]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( 230.31), SIMDE_FLOAT64_C( -618.28) }, + { SIMDE_FLOAT64_C( 180.85), SIMDE_FLOAT64_C( 444.53) }, + { UINT64_MAX, UINT64_MAX } }, + { { SIMDE_FLOAT64_C( 217.53), SIMDE_FLOAT64_C( -615.67) }, + { SIMDE_FLOAT64_C( 629.35), SIMDE_FLOAT64_C( -484.75) }, + { UINT64_C( 0), UINT64_MAX } }, + { { SIMDE_FLOAT64_C( 170.44), SIMDE_FLOAT64_C( -454.09) }, + { SIMDE_FLOAT64_C( 330.58), SIMDE_FLOAT64_C( 520.13) }, + { UINT64_C( 0), UINT64_C( 0) } }, + { { SIMDE_FLOAT64_C( -764.76), SIMDE_FLOAT64_C( -650.22) }, + { SIMDE_FLOAT64_C( -78.50), SIMDE_FLOAT64_C( 683.38) }, + { UINT64_MAX, UINT64_C( 0) } }, + { { SIMDE_FLOAT64_C( -812.10), SIMDE_FLOAT64_C( 401.95) }, + { SIMDE_FLOAT64_C( -416.07), SIMDE_FLOAT64_C( 983.29) }, + { UINT64_MAX, UINT64_C( 0) } }, + { { SIMDE_FLOAT64_C( -496.16), SIMDE_FLOAT64_C( 249.85) }, + { SIMDE_FLOAT64_C( 57.13), SIMDE_FLOAT64_C( -909.73) }, + { UINT64_MAX, UINT64_C( 0) } }, + { { SIMDE_FLOAT64_C( -537.53), SIMDE_FLOAT64_C( 707.06) }, + { SIMDE_FLOAT64_C( -45.84), SIMDE_FLOAT64_C( -807.07) }, + { UINT64_MAX, UINT64_C( 0) } }, + { { SIMDE_FLOAT64_C( -27.41), SIMDE_FLOAT64_C( 231.88) }, + { SIMDE_FLOAT64_C( -442.67), SIMDE_FLOAT64_C( -797.10) }, + { UINT64_C( 0), UINT64_C( 0) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); + simde_uint64x2_t r = simde_vcaltq_f64(a, b); + + simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); + } + + return 0; +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vcalth_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcalts_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcaltd_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcalt_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcalt_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcalt_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcaltq_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcaltq_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcaltq_f64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/create.c b/test/arm/neon/create.c index f62416a0a..2edbeaa0a 100644 --- a/test/arm/neon/create.c +++ b/test/arm/neon/create.c @@ -365,6 +365,39 @@ test_simde_vcreate_u64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vcreate_f16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a; + uint64_t r[1]; + } test_vec[] = { + { UINT64_C(14121171741695809290), + { UINT64_C(14121171741695809290) } }, + { UINT64_C(14036155041624243569), + { UINT64_C(14036155041624243569) } }, + { UINT64_C( 4933549844381694689), + { UINT64_C( 4933549844381694689) } }, + { UINT64_C( 4721956654897595679), + { UINT64_C( 4721956654897595679) } }, + { UINT64_C( 4794791124775457096), + { UINT64_C( 4794791124775457096) } }, + { UINT64_C(14142503323827870106), + { UINT64_C(14142503323827870106) } }, + { UINT64_C(14093049577892322181), + { UINT64_C(14093049577892322181) } }, + { UINT64_C( 4932300799177322332), + { UINT64_C( 4932300799177322332) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t r = simde_vcreate_f16(test_vec[i].a); + simde_test_arm_neon_assert_equal_u64x1(simde_vreinterpret_u64_f16(r), simde_vld1_u64(test_vec[i].r)); + } + + return 0; +} + + static int test_simde_vcreate_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -466,6 +499,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vcreate_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vcreate_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vcreate_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcreate_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcreate_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcreate_f64) SIMDE_TEST_FUNC_LIST_END diff --git a/test/arm/neon/cvt.c b/test/arm/neon/cvt.c index 1fd75a297..6b20d842d 100644 --- a/test/arm/neon/cvt.c +++ b/test/arm/neon/cvt.c @@ -3,6 +3,74 @@ #include "test-neon.h" #include "../../../simde/arm/neon/cvt.h" +static int +test_simde_vcvth_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a; + int16_t r; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE( -0.604), + INT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( 24.671), + INT16_C( 24) }, + { SIMDE_FLOAT16_VALUE( -23.744), + -INT16_C( 23) }, + { SIMDE_FLOAT16_VALUE( -7.939), + -INT16_C( 7) }, + { SIMDE_FLOAT16_VALUE( -18.393), + -INT16_C( 18) }, + { SIMDE_FLOAT16_VALUE( 29.124), + INT16_C( 29) }, + { SIMDE_FLOAT16_VALUE( 26.359), + INT16_C( 26) }, + { SIMDE_FLOAT16_VALUE( 19.447), + INT16_C( 19) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16_t a = test_vec[i].a; + int16_t r = simde_vcvth_s16_f16(a); + + simde_assert_equal_i16(r, test_vec[i].r); + } + + return 0; +} + +static int +test_simde_vcvth_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a; + uint16_t r; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE( 25.639), + UINT16_C( 25) }, + { SIMDE_FLOAT16_VALUE( -25.081), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( 15.061), + UINT16_C( 15) }, + { SIMDE_FLOAT16_VALUE( -21.777), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( -26.635), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( -9.047), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( -27.803), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( 3.276), + UINT16_C( 3) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16_t a = test_vec[i].a; + uint16_t r = simde_vcvth_u16_f16(a); + + simde_assert_equal_u16(r, test_vec[i].r); + } + + return 0; +} + static int test_simde_vcvts_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1566,22 +1634,26 @@ test_simde_vcvtas_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { { HEDLEY_STATIC_CAST(simde_float32, INT32_MIN) - SIMDE_FLOAT32_C(1000.0), INT32_MIN }, #endif - { SIMDE_FLOAT32_C( 550.19), - INT32_C( 550) }, - { SIMDE_FLOAT32_C( -14.71), - -INT32_C( 15) }, - { SIMDE_FLOAT32_C( 735.91), - INT32_C( 736) }, - { SIMDE_FLOAT32_C( 355.60), - INT32_C( 356) }, - { SIMDE_FLOAT32_C( -850.41), - -INT32_C( 850) }, - { SIMDE_FLOAT32_C( -934.68), - -INT32_C( 935) }, - { SIMDE_FLOAT32_C( -125.50), - -INT32_C( 126) }, - { SIMDE_FLOAT32_C( 784.50), - INT32_C( 785) } + { SIMDE_FLOAT32_C(-55.5), + -INT32_C(56) }, + { SIMDE_FLOAT32_C(55.5), + INT32_C(56) }, + { SIMDE_FLOAT32_C(-755.699707), + -INT32_C(756) }, + { SIMDE_FLOAT32_C(-479.408081), + -INT32_C(479) }, + { SIMDE_FLOAT32_C(-192.237427), + -INT32_C(192) }, + { SIMDE_FLOAT32_C(92.246948), + INT32_C(92) }, + { SIMDE_FLOAT32_C(-620.131226), + -INT32_C(620) }, + { SIMDE_FLOAT32_C(658.543213), + INT32_C(659) }, + { SIMDE_FLOAT32_C(-58.790283), + -INT32_C(59) }, + { SIMDE_FLOAT32_C(-777.055359), + -INT32_C(777) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -1605,23 +1677,25 @@ test_simde_vcvtas_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { UINT32_C( 0) }, { HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX) + SIMDE_FLOAT32_C(1000.0), UINT32_MAX }, + { SIMDE_MATH_INFINITYF, + UINT32_MAX }, #endif - { SIMDE_FLOAT32_C( 550.19), - UINT32_C( 550) }, - { SIMDE_FLOAT32_C( -14.71), - UINT32_C( 0) }, - { SIMDE_FLOAT32_C( 735.91), - UINT32_C( 736) }, - { SIMDE_FLOAT32_C( 355.60), - UINT32_C( 356) }, - { SIMDE_FLOAT32_C( -850.41), - UINT32_C( 0) }, - { SIMDE_FLOAT32_C( -934.68), - UINT32_C( 0) }, - { SIMDE_FLOAT32_C( -125.28), - UINT32_C( 0) }, - { SIMDE_FLOAT32_C( 784.80), - UINT32_C( 785) } + { SIMDE_FLOAT32_C(238.269043), + UINT32_C(238) }, + { SIMDE_FLOAT32_C(884.073364), + UINT32_C(884) }, + { SIMDE_FLOAT32_C(517.341492), + UINT32_C(517) }, + { SIMDE_FLOAT32_C(161.270676), + UINT32_C(161) }, + { SIMDE_FLOAT32_C(302.139801), + UINT32_C(302) }, + { SIMDE_FLOAT32_C(949.265381), + UINT32_C(949) }, + { SIMDE_FLOAT32_C(586.265320), + UINT32_C(586) }, + { SIMDE_FLOAT32_C(230.019547), + UINT32_C(230) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -1647,22 +1721,22 @@ test_simde_vcvta_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { { { SIMDE_MATH_NANF, SIMDE_MATH_INFINITYF }, { INT32_C( 0), INT32_MAX } }, #endif - { { SIMDE_FLOAT32_C( 396.3), SIMDE_FLOAT32_C( -246.90) }, - { INT32_C( 396), -INT32_C( 247) } }, - { { SIMDE_FLOAT32_C( 241.51), SIMDE_FLOAT32_C( 602.56) }, - { INT32_C( 242), INT32_C( 603) } }, - { { SIMDE_FLOAT32_C( -106.85), SIMDE_FLOAT32_C( -566.67) }, - { -INT32_C( 107), -INT32_C( 567) } }, - { { SIMDE_FLOAT32_C( 463.44), SIMDE_FLOAT32_C( 539.86) }, - { INT32_C( 463), INT32_C( 540) } }, - { { SIMDE_FLOAT32_C( -550.41), SIMDE_FLOAT32_C( 982.91) }, - { -INT32_C( 550), INT32_C( 983) } }, - { { SIMDE_FLOAT32_C( 499.92), SIMDE_FLOAT32_C( -727.55) }, - { INT32_C( 500), -INT32_C( 728) } }, - { { SIMDE_FLOAT32_C( -713.41), SIMDE_FLOAT32_C( 713.10) }, - { -INT32_C( 713), INT32_C( 713) } }, - { { SIMDE_FLOAT32_C( -998.69), SIMDE_FLOAT32_C( -409.99) }, - { -INT32_C( 999), -INT32_C( 410) } } + { { SIMDE_FLOAT32_C(-137.097046), SIMDE_FLOAT32_C(632.638672) }, + { -INT32_C(137), INT32_C(633) } }, + { { SIMDE_FLOAT32_C(135.947388), SIMDE_FLOAT32_C(-204.564087) }, + { INT32_C(136), -INT32_C(205) } }, + { { SIMDE_FLOAT32_C(422.245239), SIMDE_FLOAT32_C(972.902710) }, + { INT32_C(422), INT32_C(973) } }, + { { SIMDE_FLOAT32_C(-291.536621), SIMDE_FLOAT32_C(-849.554077) }, + { -INT32_C(292), -INT32_C(850) } }, + { { SIMDE_FLOAT32_C(-9.575623), SIMDE_FLOAT32_C(318.716919) }, + { -INT32_C(10), INT32_C(319) } }, + { { SIMDE_FLOAT32_C(-734.776367), SIMDE_FLOAT32_C(-510.679810) }, + { -INT32_C(735), -INT32_C(511) } }, + { { SIMDE_FLOAT32_C(-457.886719), SIMDE_FLOAT32_C(655.444580) }, + { -INT32_C(458), INT32_C(655) } }, + { { SIMDE_FLOAT32_C(847.546021), SIMDE_FLOAT32_C(849.980591) }, + { INT32_C(848), INT32_C(850) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -1688,22 +1762,22 @@ test_simde_vcvta_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { { { SIMDE_MATH_NANF, SIMDE_MATH_INFINITYF }, { UINT32_C( 0), UINT32_MAX } }, #endif - { { SIMDE_FLOAT32_C( 396.3), SIMDE_FLOAT32_C( -246.90) }, - { UINT32_C( 396), UINT32_C( 0) } }, - { { SIMDE_FLOAT32_C( 241.51), SIMDE_FLOAT32_C( 602.56) }, - { UINT32_C( 242), UINT32_C( 603) } }, - { { SIMDE_FLOAT32_C( -106.85), SIMDE_FLOAT32_C( -566.67) }, - { UINT32_C( 0), UINT32_C( 0) } }, - { { SIMDE_FLOAT32_C( 463.44), SIMDE_FLOAT32_C( 539.86) }, - { UINT32_C( 463), UINT32_C( 540) } }, - { { SIMDE_FLOAT32_C( -550.41), SIMDE_FLOAT32_C( 982.91) }, - { UINT32_C( 0), UINT32_C( 983) } }, - { { SIMDE_FLOAT32_C( 499.92), SIMDE_FLOAT32_C( -727.55) }, - { UINT32_C( 500), UINT32_C( 0) } }, - { { SIMDE_FLOAT32_C( -713.41), SIMDE_FLOAT32_C( 713.10) }, - { UINT32_C( 0), UINT32_C( 713) } }, - { { SIMDE_FLOAT32_C( -998.69), SIMDE_FLOAT32_C( -409.99) }, - { UINT32_C( 0), UINT32_C( 0) } } + { { SIMDE_FLOAT32_C(518.760376), SIMDE_FLOAT32_C(796.769409) }, + { UINT32_C(519), UINT32_C(797) } }, + { { SIMDE_FLOAT32_C(161.204361), SIMDE_FLOAT32_C(381.395020) }, + { UINT32_C(161), UINT32_C(381) } }, + { { SIMDE_FLOAT32_C(803.856689), SIMDE_FLOAT32_C(971.859131) }, + { UINT32_C(804), UINT32_C(972) } }, + { { SIMDE_FLOAT32_C(445.868378), SIMDE_FLOAT32_C(558.828979) }, + { UINT32_C(446), UINT32_C(559) } }, + { { SIMDE_FLOAT32_C(83.968452), SIMDE_FLOAT32_C(140.023712) }, + { UINT32_C(84), UINT32_C(140) } }, + { { SIMDE_FLOAT32_C(230.921921), SIMDE_FLOAT32_C(235.137802) }, + { UINT32_C(231), UINT32_C(235) } }, + { { SIMDE_FLOAT32_C(367.292725), SIMDE_FLOAT32_C(815.052429) }, + { UINT32_C(367), UINT32_C(815) } }, + { { SIMDE_FLOAT32_C(13.168660), SIMDE_FLOAT32_C(406.672668) }, + { UINT32_C(13), UINT32_C(407) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -1726,20 +1800,22 @@ test_simde_vcvtaq_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { { { HEDLEY_STATIC_CAST(simde_float32, INT32_MAX) + SIMDE_FLOAT32_C(10000.0), HEDLEY_STATIC_CAST(simde_float32, INT32_MIN) - SIMDE_FLOAT32_C(10000.0), SIMDE_MATH_NANF, SIMDE_MATH_INFINITYF }, { INT32_MAX, INT32_MIN, INT32_C( 0), INT32_MAX } }, #endif - { { SIMDE_FLOAT32_C( 553.19), SIMDE_FLOAT32_C( -89.37), SIMDE_FLOAT32_C( -751.51), SIMDE_FLOAT32_C( 39.67) }, - { INT32_C( 553), -INT32_C( 89), -INT32_C( 752), INT32_C( 40) } }, - { { SIMDE_FLOAT32_C( 324.39), SIMDE_FLOAT32_C( 39.90), SIMDE_FLOAT32_C( 154.38), SIMDE_FLOAT32_C( -782.06) }, - { INT32_C( 324), INT32_C( 40), INT32_C( 154), -INT32_C( 782) } }, - { { SIMDE_FLOAT32_C( 683.78), SIMDE_FLOAT32_C( 860.43), SIMDE_FLOAT32_C( 258.08), SIMDE_FLOAT32_C( -431.46) }, - { INT32_C( 684), INT32_C( 860), INT32_C( 258), -INT32_C( 431) } }, - { { SIMDE_FLOAT32_C( 4.94), SIMDE_FLOAT32_C( -752.53), SIMDE_FLOAT32_C( 343.30), SIMDE_FLOAT32_C( -618.07) }, - { INT32_C( 5), -INT32_C( 753), INT32_C( 343), -INT32_C( 618) } }, - { { SIMDE_FLOAT32_C( -508.63), SIMDE_FLOAT32_C( 933.29), SIMDE_FLOAT32_C( 48.92), SIMDE_FLOAT32_C( 220.74) }, - { -INT32_C( 509), INT32_C( 933), INT32_C( 49), INT32_C( 221) } }, - { { SIMDE_FLOAT32_C( -447.64), SIMDE_FLOAT32_C( -181.80), SIMDE_FLOAT32_C( -962.01), SIMDE_FLOAT32_C( 914.94) }, - { -INT32_C( 448), -INT32_C( 182), -INT32_C( 962), INT32_C( 915) } }, - { { SIMDE_FLOAT32_C( -193.26), SIMDE_FLOAT32_C( 71.12), SIMDE_FLOAT32_C( 342.76), SIMDE_FLOAT32_C( -390.07) }, - { -INT32_C( 193), INT32_C( 71), INT32_C( 343), -INT32_C( 390) } } + { { SIMDE_FLOAT32_C(-100.235291), SIMDE_FLOAT32_C(963.126831), SIMDE_FLOAT32_C(-513.717896), SIMDE_FLOAT32_C(76.769287) }, + { -INT32_C(100), INT32_C(963), -INT32_C(514), INT32_C(77) } }, + { { SIMDE_FLOAT32_C(188.924072), SIMDE_FLOAT32_C(145.440186), SIMDE_FLOAT32_C(889.209717), SIMDE_FLOAT32_C(443.582153) }, + { INT32_C(189), INT32_C(145), INT32_C(889), INT32_C(444) } }, + { { SIMDE_FLOAT32_C(-530.063477), SIMDE_FLOAT32_C(316.464478), SIMDE_FLOAT32_C(-720.190491), SIMDE_FLOAT32_C(800.602661) }, + { -INT32_C(530), INT32_C(316), -INT32_C(720), INT32_C(801) } }, + { { SIMDE_FLOAT32_C(-236.361084), SIMDE_FLOAT32_C(499.799438), SIMDE_FLOAT32_C(686.013672), SIMDE_FLOAT32_C(172.975098) }, + { -INT32_C(236), INT32_C(500), INT32_C(686), INT32_C(173) } }, + { { SIMDE_FLOAT32_C(43.644165), SIMDE_FLOAT32_C(-327.279907), SIMDE_FLOAT32_C(-920.398865), SIMDE_FLOAT32_C(528.922852) }, + { INT32_C(44), -INT32_C(327), -INT32_C(920), INT32_C(529) } }, + { { SIMDE_FLOAT32_C(-676.315308), SIMDE_FLOAT32_C(-156.078674), SIMDE_FLOAT32_C(475.171509), SIMDE_FLOAT32_C(885.232666) }, + { -INT32_C(676), -INT32_C(156), INT32_C(475), INT32_C(885) } }, + { { SIMDE_FLOAT32_C(599.373413), SIMDE_FLOAT32_C(8.430664), SIMDE_FLOAT32_C(-142.695679), SIMDE_FLOAT32_C(235.751221) }, + { INT32_C(599), INT32_C(8), -INT32_C(143), INT32_C(236) } }, + { { SIMDE_FLOAT32_C(895.002075), SIMDE_FLOAT32_C(-806.336182), SIMDE_FLOAT32_C(-732.325745), SIMDE_FLOAT32_C(-389.401733) }, + { INT32_C(895), -INT32_C(806), -INT32_C(732), -INT32_C(389) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -1761,20 +1837,22 @@ test_simde_vcvtaq_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { { { HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX) + SIMDE_FLOAT32_C(10000.0), SIMDE_FLOAT32_C(-10000.0), SIMDE_MATH_NANF, SIMDE_MATH_INFINITYF }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, #endif - { { SIMDE_FLOAT32_C( 553.19), SIMDE_FLOAT32_C( -89.37), SIMDE_FLOAT32_C( -751.51), SIMDE_FLOAT32_C( 39.67) }, - { UINT32_C( 553), UINT32_C( 0), UINT32_C( 0), UINT32_C( 40) } }, - { { SIMDE_FLOAT32_C( 324.39), SIMDE_FLOAT32_C( 39.90), SIMDE_FLOAT32_C( 154.38), SIMDE_FLOAT32_C( -782.06) }, - { UINT32_C( 324), UINT32_C( 40), UINT32_C( 154), UINT32_C( 0) } }, - { { SIMDE_FLOAT32_C( 683.78), SIMDE_FLOAT32_C( 860.43), SIMDE_FLOAT32_C( 258.08), SIMDE_FLOAT32_C( -431.46) }, - { UINT32_C( 684), UINT32_C( 860), UINT32_C( 258), UINT32_C( 0) } }, - { { SIMDE_FLOAT32_C( 4.94), SIMDE_FLOAT32_C( -752.53), SIMDE_FLOAT32_C( 343.30), SIMDE_FLOAT32_C( -618.07) }, - { UINT32_C( 5), UINT32_C( 0), UINT32_C( 343), UINT32_C( 0) } }, - { { SIMDE_FLOAT32_C( -508.63), SIMDE_FLOAT32_C( 933.29), SIMDE_FLOAT32_C( 48.92), SIMDE_FLOAT32_C( 220.74) }, - { UINT32_C( 0), UINT32_C( 933), UINT32_C( 49), UINT32_C( 221) } }, - { { SIMDE_FLOAT32_C( -447.64), SIMDE_FLOAT32_C( -181.80), SIMDE_FLOAT32_C( -962.01), SIMDE_FLOAT32_C( 914.94) }, - { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 915) } }, - { { SIMDE_FLOAT32_C( -193.26), SIMDE_FLOAT32_C( 71.12), SIMDE_FLOAT32_C( 342.76), SIMDE_FLOAT32_C( -390.07) }, - { UINT32_C( 0), UINT32_C( 71), UINT32_C( 343), UINT32_C( 0) } } + { { SIMDE_FLOAT32_C(99.796890), SIMDE_FLOAT32_C(640.625061), SIMDE_FLOAT32_C(761.249390), SIMDE_FLOAT32_C(134.496353) }, + { UINT32_C(100), UINT32_C(641), UINT32_C(761), UINT32_C(134) } }, + { { SIMDE_FLOAT32_C(740.153748), SIMDE_FLOAT32_C(226.072403), SIMDE_FLOAT32_C(458.142426), SIMDE_FLOAT32_C(312.975708) }, + { UINT32_C(740), UINT32_C(226), UINT32_C(458), UINT32_C(313) } }, + { { SIMDE_FLOAT32_C(881.748596), SIMDE_FLOAT32_C(315.416504), SIMDE_FLOAT32_C(657.340698), SIMDE_FLOAT32_C(492.805298) }, + { UINT32_C(882), UINT32_C(315), UINT32_C(657), UINT32_C(493) } }, + { { SIMDE_FLOAT32_C(27.446901), SIMDE_FLOAT32_C(904.086670), SIMDE_FLOAT32_C(857.025085), SIMDE_FLOAT32_C(677.571045) }, + { UINT32_C(27), UINT32_C(904), UINT32_C(857), UINT32_C(678) } }, + { { SIMDE_FLOAT32_C(666.073059), SIMDE_FLOAT32_C(988.718506), SIMDE_FLOAT32_C(51.321510), SIMDE_FLOAT32_C(353.845490) }, + { UINT32_C(666), UINT32_C(989), UINT32_C(51), UINT32_C(354) } }, + { { SIMDE_FLOAT32_C(307.715729), SIMDE_FLOAT32_C(75.778244), SIMDE_FLOAT32_C(748.057373), SIMDE_FLOAT32_C(533.695679) }, + { UINT32_C(308), UINT32_C(76), UINT32_C(748), UINT32_C(534) } }, + { { SIMDE_FLOAT32_C(949.232422), SIMDE_FLOAT32_C(163.359085), SIMDE_FLOAT32_C(946.573120), SIMDE_FLOAT32_C(713.519104) }, + { UINT32_C(949), UINT32_C(163), UINT32_C(947), UINT32_C(714) } }, + { { SIMDE_FLOAT32_C(592.152954), SIMDE_FLOAT32_C(751.258545), SIMDE_FLOAT32_C(645.332520), SIMDE_FLOAT32_C(894.986938) }, + { UINT32_C(592), UINT32_C(751), UINT32_C(645), UINT32_C(895) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -1787,6 +1865,8 @@ test_simde_vcvtaq_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { } SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_s16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_u16_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcvts_s32_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtd_s64_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcvts_u32_f32) diff --git a/test/arm/neon/cvt_n.c b/test/arm/neon/cvt_n.c new file mode 100644 index 000000000..683910bb3 --- /dev/null +++ b/test/arm/neon/cvt_n.c @@ -0,0 +1,1045 @@ +#define SIMDE_TEST_ARM_NEON_INSN cvt_n + +#include "test-neon.h" +#include "../../../simde/arm/neon/cvt_n.h" +#include "../../../simde/arm/neon/dup_n.h" + +static int +test_simde_vcvt_n_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + int16_t r3[4]; + int16_t r6[4]; + int16_t r10[4]; + int16_t r13[4]; + int16_t r16[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-0.2), SIMDE_FLOAT16_VALUE(-4.8), SIMDE_FLOAT16_VALUE(9.9), SIMDE_FLOAT16_VALUE(1.1), }, + { -INT16_C(1), -INT16_C(38), INT16_C(79), INT16_C(8), }, + { -INT16_C(12), -INT16_C(307), INT16_C(633), INT16_C(70), }, + { -INT16_C(204), -INT16_C(4916), INT16_C(10136), INT16_C(1126), }, + { -INT16_C(1638), INT16_MIN, INT16_MAX, INT16_C(9008), }, + { -INT16_C(13104), INT16_MIN, INT16_MAX, INT16_MAX, } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_int16x4_t r3 = simde_vcvt_n_s16_f16(a, 3); + simde_int16x4_t r6 = simde_vcvt_n_s16_f16(a, 6); + simde_int16x4_t r10 = simde_vcvt_n_s16_f16(a, 10); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_int16x4_t r13 = simde_vcvt_n_s16_f16(a, 13); + simde_int16x4_t r16 = simde_vcvt_n_s16_f16(a, 16); + #endif + + simde_test_arm_neon_assert_equal_i16x4(r3, simde_vld1_s16(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_i16x4(r6, simde_vld1_s16(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_i16x4(r10, simde_vld1_s16(test_vec[i].r10)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_i16x4(r13, simde_vld1_s16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_i16x4(r16, simde_vld1_s16(test_vec[i].r16)); + #endif + } + + return 0; +} + +static int +test_simde_vcvt_n_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32 a[2]; + int32_t r3[2]; + int32_t r10[2]; + int32_t r16[2]; + int32_t r23[2]; + int32_t r32[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(-733.4), SIMDE_FLOAT32_C(-808.5) }, + { -INT32_C(5867), -INT32_C(6468) }, + { -INT32_C(751001), -INT32_C(827904) }, + { -INT32_C(48064104), -INT32_C(52985856) }, + { INT32_MIN, INT32_MIN }, + { INT32_MIN, INT32_MIN } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_int32x2_t r3 = simde_vcvt_n_s32_f32(a, 3); + simde_int32x2_t r10 = simde_vcvt_n_s32_f32(a, 10); + simde_int32x2_t r16 = simde_vcvt_n_s32_f32(a, 16); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_int32x2_t r23 = simde_vcvt_n_s32_f32(a, 23); + simde_int32x2_t r32 = simde_vcvt_n_s32_f32(a, 32); + #endif + + simde_test_arm_neon_assert_equal_i32x2(r3, simde_vld1_s32(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_i32x2(r10, simde_vld1_s32(test_vec[i].r10)); + simde_test_arm_neon_assert_equal_i32x2(r16, simde_vld1_s32(test_vec[i].r16)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_i32x2(r23, simde_vld1_s32(test_vec[i].r23)); + simde_test_arm_neon_assert_equal_i32x2(r32, simde_vld1_s32(test_vec[i].r32)); + #endif + } + + return 0; +} + +static int +test_simde_vcvt_n_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64 a[1]; + int64_t r3[1]; + int64_t r17[1]; + int64_t r23[1]; + int64_t r38[1]; + int64_t r55[1]; + int64_t r64[1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(-9709.6) }, + { -INT64_C(77676) }, + { -INT64_C(1272656691) }, + { -INT64_C(81450028236) }, + { -INT64_C(2668954525263462) }, + { INT64_MIN }, + { INT64_MIN } }, + { { SIMDE_FLOAT64_C(8973.1) }, + { INT64_C(71784) }, + { INT64_C(1176122163) }, + { INT64_C(75271818444) }, + { INT64_C(2466506946799206) }, + { INT64_MAX }, + { INT64_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); + simde_int64x1_t r3 = simde_vcvt_n_s64_f64(a, 3); + simde_int64x1_t r17 = simde_vcvt_n_s64_f64(a, 17); + simde_int64x1_t r23 = simde_vcvt_n_s64_f64(a, 23); + simde_int64x1_t r38 = simde_vcvt_n_s64_f64(a, 38); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_int64x1_t r55 = simde_vcvt_n_s64_f64(a, 55); + simde_int64x1_t r64 = simde_vcvt_n_s64_f64(a, 64); + #endif + + simde_test_arm_neon_assert_equal_i64x1(r3, simde_vld1_s64(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_i64x1(r17, simde_vld1_s64(test_vec[i].r17)); + simde_test_arm_neon_assert_equal_i64x1(r23, simde_vld1_s64(test_vec[i].r23)); + simde_test_arm_neon_assert_equal_i64x1(r38, simde_vld1_s64(test_vec[i].r38)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_i64x1(r55, simde_vld1_s64(test_vec[i].r55)); + simde_test_arm_neon_assert_equal_i64x1(r64, simde_vld1_s64(test_vec[i].r64)); + #endif + } + + return 0; +} + +static int +test_simde_vcvt_n_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + uint16_t r3[4]; + uint16_t r6[4]; + uint16_t r10[4]; + uint16_t r13[4]; + uint16_t r16[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(9.1), SIMDE_FLOAT16_VALUE(5.4), SIMDE_FLOAT16_VALUE(3.2), }, + { UINT16_C(11), UINT16_C(72), UINT16_C(43), UINT16_C(25), }, + { UINT16_C(89), UINT16_C(582), UINT16_C(345), UINT16_C(204), }, + { UINT16_C(1434), UINT16_C(9320), UINT16_C(5528), UINT16_C(3276), }, + { UINT16_C(11472), UINT16_MAX, UINT16_C(44224), UINT16_C(26208), }, + { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_uint16x4_t r3 = simde_vcvt_n_u16_f16(a, 3); + simde_uint16x4_t r6 = simde_vcvt_n_u16_f16(a, 6); + simde_uint16x4_t r10 = simde_vcvt_n_u16_f16(a, 10); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_uint16x4_t r13 = simde_vcvt_n_u16_f16(a, 13); + simde_uint16x4_t r16 = simde_vcvt_n_u16_f16(a, 16); + #endif + + simde_test_arm_neon_assert_equal_u16x4(r3, simde_vld1_u16(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_u16x4(r6, simde_vld1_u16(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_u16x4(r10, simde_vld1_u16(test_vec[i].r10)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_u16x4(r13, simde_vld1_u16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_u16x4(r16, simde_vld1_u16(test_vec[i].r16)); + #endif + } + + return 0; +} + +static int +test_simde_vcvt_n_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32 a[2]; + uint32_t r3[2]; + uint32_t r10[2]; + uint32_t r16[2]; + uint32_t r23[2]; + uint32_t r32[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(731.2), SIMDE_FLOAT32_C(293.2) }, + { UINT32_C(5849), UINT32_C(2345) }, + { UINT32_C(748748), UINT32_C(300236) }, + { UINT32_C(47919924), UINT32_C(19215156) }, + { UINT32_MAX, UINT32_C(2459539968) }, + { UINT32_MAX, UINT32_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_uint32x2_t r3 = simde_vcvt_n_u32_f32(a, 3); + simde_uint32x2_t r10 = simde_vcvt_n_u32_f32(a, 10); + simde_uint32x2_t r16 = simde_vcvt_n_u32_f32(a, 16); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_uint32x2_t r23 = simde_vcvt_n_u32_f32(a, 23); + simde_uint32x2_t r32 = simde_vcvt_n_u32_f32(a, 32); + #endif + + simde_test_arm_neon_assert_equal_u32x2(r3, simde_vld1_u32(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_u32x2(r10, simde_vld1_u32(test_vec[i].r10)); + simde_test_arm_neon_assert_equal_u32x2(r16, simde_vld1_u32(test_vec[i].r16)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_u32x2(r23, simde_vld1_u32(test_vec[i].r23)); + simde_test_arm_neon_assert_equal_u32x2(r32, simde_vld1_u32(test_vec[i].r32)); + #endif + } + + return 0; +} + +static int +test_simde_vcvt_n_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64 a[1]; + uint64_t r3[1]; + uint64_t r17[1]; + uint64_t r23[1]; + uint64_t r38[1]; + uint64_t r55[1]; + uint64_t r64[1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(446.9) }, + { UINT64_C(3575) }, + { UINT64_C(58576076) }, + { UINT64_C(3748868915) }, + { UINT64_C(122842936613273) }, + { UINT64_C(16101269387774996480) }, + { UINT64_MAX } }, + { { SIMDE_FLOAT64_C(3993.6) }, + { UINT64_C(31948) }, + { UINT64_C(523449139) }, + { UINT64_C(33500744908) }, + { UINT64_C(1097752409171558) }, + { UINT64_MAX }, + { UINT64_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); + simde_uint64x1_t r3 = simde_vcvt_n_u64_f64(a, 3); + simde_uint64x1_t r17 = simde_vcvt_n_u64_f64(a, 17); + simde_uint64x1_t r23 = simde_vcvt_n_u64_f64(a, 23); + simde_uint64x1_t r38 = simde_vcvt_n_u64_f64(a, 38); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_uint64x1_t r55 = simde_vcvt_n_u64_f64(a, 55); + simde_uint64x1_t r64 = simde_vcvt_n_u64_f64(a, 64); + #endif + + simde_test_arm_neon_assert_equal_u64x1(r3, simde_vld1_u64(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_u64x1(r17, simde_vld1_u64(test_vec[i].r17)); + simde_test_arm_neon_assert_equal_u64x1(r23, simde_vld1_u64(test_vec[i].r23)); + simde_test_arm_neon_assert_equal_u64x1(r38, simde_vld1_u64(test_vec[i].r38)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_u64x1(r55, simde_vld1_u64(test_vec[i].r55)); + simde_test_arm_neon_assert_equal_u64x1(r64, simde_vld1_u64(test_vec[i].r64)); + #endif + } + + return 0; +} + +static int +test_simde_vcvtq_n_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[8]; + int16_t r3[8]; + int16_t r6[8]; + int16_t r10[8]; + int16_t r13[8]; + int16_t r16[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-0.7), SIMDE_FLOAT16_VALUE(-4.5), SIMDE_FLOAT16_VALUE(0.8), SIMDE_FLOAT16_VALUE(-9.3), SIMDE_FLOAT16_VALUE(-4.4), SIMDE_FLOAT16_VALUE(9.3), SIMDE_FLOAT16_VALUE(6.9), SIMDE_FLOAT16_VALUE(-5.9), }, + { -INT16_C(5), -INT16_C(36), INT16_C(6), -INT16_C(74), -INT16_C(35), INT16_C(74), INT16_C(55), -INT16_C(47), }, + { -INT16_C(44), -INT16_C(288), INT16_C(51), -INT16_C(595), -INT16_C(281), INT16_C(595), INT16_C(441), -INT16_C(377), }, + { -INT16_C(717), -INT16_C(4608), INT16_C(819), -INT16_C(9520), -INT16_C(4504), INT16_C(9520), INT16_C(7064), -INT16_C(6040), }, + { -INT16_C(5736), INT16_MIN, INT16_C(6552), INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, INT16_MIN, }, + { INT16_MIN, INT16_MIN, INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, INT16_MIN, } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_int16x8_t r3 = simde_vcvtq_n_s16_f16(a, 3); + simde_int16x8_t r6 = simde_vcvtq_n_s16_f16(a, 6); + simde_int16x8_t r10 = simde_vcvtq_n_s16_f16(a, 10); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_int16x8_t r13 = simde_vcvtq_n_s16_f16(a, 13); + simde_int16x8_t r16 = simde_vcvtq_n_s16_f16(a, 16); + #endif + + simde_test_arm_neon_assert_equal_i16x8(r3, simde_vld1q_s16(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_i16x8(r6, simde_vld1q_s16(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_i16x8(r10, simde_vld1q_s16(test_vec[i].r10)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_i16x8(r13, simde_vld1q_s16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_i16x8(r16, simde_vld1q_s16(test_vec[i].r16)); + #endif + } + + return 0; +} + +static int +test_simde_vcvtq_n_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32 a[4]; + int32_t r3[4]; + int32_t r10[4]; + int32_t r16[4]; + int32_t r23[4]; + int32_t r32[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(422.6), SIMDE_FLOAT32_C(749.1), SIMDE_FLOAT32_C(-101.2), SIMDE_FLOAT32_C(-5.7) }, + { INT32_C(3380), INT32_C(5992), -INT32_C(809), -INT32_C(45) }, + { INT32_C(432742), INT32_C(767078), -INT32_C(103628), -INT32_C(5836) }, + { INT32_C(27695514), INT32_C(49093016), -INT32_C(6632243), -INT32_C(373555) }, + { INT32_MAX, INT32_MAX, -INT32_C(848927104), -INT32_C(47815064) }, + { INT32_MAX, INT32_MAX, INT32_MIN, INT32_MIN } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_int32x4_t r3 = simde_vcvtq_n_s32_f32(a, 3); + simde_int32x4_t r10 = simde_vcvtq_n_s32_f32(a, 10); + simde_int32x4_t r16 = simde_vcvtq_n_s32_f32(a, 16); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_int32x4_t r23 = simde_vcvtq_n_s32_f32(a, 23); + simde_int32x4_t r32 = simde_vcvtq_n_s32_f32(a, 32); + #endif + + simde_test_arm_neon_assert_equal_i32x4(r3, simde_vld1q_s32(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_i32x4(r10, simde_vld1q_s32(test_vec[i].r10)); + simde_test_arm_neon_assert_equal_i32x4(r16, simde_vld1q_s32(test_vec[i].r16)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_i32x4(r23, simde_vld1q_s32(test_vec[i].r23)); + simde_test_arm_neon_assert_equal_i32x4(r32, simde_vld1q_s32(test_vec[i].r32)); + #endif + } + + return 0; +} + +static int +test_simde_vcvtq_n_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64 a[2]; + int64_t r3[2]; + int64_t r17[2]; + int64_t r23[2]; + int64_t r38[2]; + int64_t r55[2]; + int64_t r64[2]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(-9709.6), SIMDE_FLOAT64_C(8973.1) }, + { -INT64_C(77676), INT64_C(71784) }, + { -INT64_C(1272656691), INT64_C(1176122163) }, + { -INT64_C(81450028236), INT64_C(75271818444) }, + { -INT64_C(2668954525263462), INT64_C(2466506946799206) }, + { INT64_MIN, INT64_MAX }, + { INT64_MIN, INT64_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_int64x2_t r3 = simde_vcvtq_n_s64_f64(a, 3); + simde_int64x2_t r17 = simde_vcvtq_n_s64_f64(a, 17); + simde_int64x2_t r23 = simde_vcvtq_n_s64_f64(a, 23); + simde_int64x2_t r38 = simde_vcvtq_n_s64_f64(a, 38); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_int64x2_t r55 = simde_vcvtq_n_s64_f64(a, 55); + simde_int64x2_t r64 = simde_vcvtq_n_s64_f64(a, 64); + #endif + + simde_test_arm_neon_assert_equal_i64x2(r3, simde_vld1q_s64(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_i64x2(r17, simde_vld1q_s64(test_vec[i].r17)); + simde_test_arm_neon_assert_equal_i64x2(r23, simde_vld1q_s64(test_vec[i].r23)); + simde_test_arm_neon_assert_equal_i64x2(r38, simde_vld1q_s64(test_vec[i].r38)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_i64x2(r55, simde_vld1q_s64(test_vec[i].r55)); + simde_test_arm_neon_assert_equal_i64x2(r64, simde_vld1q_s64(test_vec[i].r64)); + #endif + } + + return 0; +} + +static int +test_simde_vcvtq_n_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[8]; + uint16_t r3[8]; + uint16_t r6[8]; + uint16_t r10[8]; + uint16_t r13[8]; + uint16_t r16[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(4.8), SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(0.6), SIMDE_FLOAT16_VALUE(2.6), SIMDE_FLOAT16_VALUE(1.8), SIMDE_FLOAT16_VALUE(6.5), SIMDE_FLOAT16_VALUE(9.8), SIMDE_FLOAT16_VALUE(7.5), }, + { UINT16_C(38), UINT16_C(11), UINT16_C(4), UINT16_C(20), UINT16_C(14), UINT16_C(52), UINT16_C(78), UINT16_C(60), }, + { UINT16_C(307), UINT16_C(89), UINT16_C(38), UINT16_C(166), UINT16_C(115), UINT16_C(416), UINT16_C(627), UINT16_C(480), }, + { UINT16_C(4916), UINT16_C(1434), UINT16_C(614), UINT16_C(2662), UINT16_C(1843), UINT16_C(6656), UINT16_C(10032), UINT16_C(7680), }, + { UINT16_C(39328), UINT16_C(11472), UINT16_C(4916), UINT16_C(21296), UINT16_C(14744), UINT16_C(53248), UINT16_MAX, UINT16_C(61440), }, + { UINT16_MAX, UINT16_MAX, UINT16_C(39328), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_uint16x8_t r3 = simde_vcvtq_n_u16_f16(a, 3); + simde_uint16x8_t r6 = simde_vcvtq_n_u16_f16(a, 6); + simde_uint16x8_t r10 = simde_vcvtq_n_u16_f16(a, 10); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_uint16x8_t r13 = simde_vcvtq_n_u16_f16(a, 13); + simde_uint16x8_t r16 = simde_vcvtq_n_u16_f16(a, 16); + #endif + + simde_test_arm_neon_assert_equal_u16x8(r3, simde_vld1q_u16(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_u16x8(r6, simde_vld1q_u16(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_u16x8(r10, simde_vld1q_u16(test_vec[i].r10)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_u16x8(r13, simde_vld1q_u16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_u16x8(r16, simde_vld1q_u16(test_vec[i].r16)); + #endif + } + + return 0; +} + +static int +test_simde_vcvtq_n_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32 a[4]; + uint32_t r3[4]; + uint32_t r10[4]; + uint32_t r16[4]; + uint32_t r23[4]; + uint32_t r32[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(997.5), SIMDE_FLOAT32_C(825.7), SIMDE_FLOAT32_C(684.9), SIMDE_FLOAT32_C(227.4) }, + { UINT32_C(7980), UINT32_C(6605), UINT32_C(5479), UINT32_C(1819) }, + { UINT32_C(1021440), UINT32_C(845516), UINT32_C(701337), UINT32_C(232857) }, + { UINT32_C(65372160), UINT32_C(54113076), UINT32_C(44885608), UINT32_C(14902886) }, + { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_C(1907569408) }, + { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_uint32x4_t r3 = simde_vcvtq_n_u32_f32(a, 3); + simde_uint32x4_t r10 = simde_vcvtq_n_u32_f32(a, 10); + simde_uint32x4_t r16 = simde_vcvtq_n_u32_f32(a, 16); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_uint32x4_t r23 = simde_vcvtq_n_u32_f32(a, 23); + simde_uint32x4_t r32 = simde_vcvtq_n_u32_f32(a, 32); + #endif + + simde_test_arm_neon_assert_equal_u32x4(r3, simde_vld1q_u32(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_u32x4(r10, simde_vld1q_u32(test_vec[i].r10)); + simde_test_arm_neon_assert_equal_u32x4(r16, simde_vld1q_u32(test_vec[i].r16)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_u32x4(r23, simde_vld1q_u32(test_vec[i].r23)); + simde_test_arm_neon_assert_equal_u32x4(r32, simde_vld1q_u32(test_vec[i].r32)); + #endif + } + + return 0; +} + +static int +test_simde_vcvtq_n_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64 a[2]; + uint64_t r3[2]; + uint64_t r17[2]; + uint64_t r23[2]; + uint64_t r38[2]; + uint64_t r55[2]; + uint64_t r64[2]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(446.9), SIMDE_FLOAT64_C(3993.6) }, + { UINT64_C(3575), UINT64_C(31948) }, + { UINT64_C(58576076), UINT64_C(523449139) }, + { UINT64_C(3748868915), UINT64_C(33500744908) }, + { UINT64_C(122842936613273), UINT64_C(1097752409171558) }, + { UINT64_C(16101269387774996480), UINT64_MAX }, + { UINT64_MAX, UINT64_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_uint64x2_t r3 = simde_vcvtq_n_u64_f64(a, 3); + simde_uint64x2_t r17 = simde_vcvtq_n_u64_f64(a, 17); + simde_uint64x2_t r23 = simde_vcvtq_n_u64_f64(a, 23); + simde_uint64x2_t r38 = simde_vcvtq_n_u64_f64(a, 38); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_uint64x2_t r55 = simde_vcvtq_n_u64_f64(a, 55); + simde_uint64x2_t r64 = simde_vcvtq_n_u64_f64(a, 64); + #endif + + simde_test_arm_neon_assert_equal_u64x2(r3, simde_vld1q_u64(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_u64x2(r17, simde_vld1q_u64(test_vec[i].r17)); + simde_test_arm_neon_assert_equal_u64x2(r23, simde_vld1q_u64(test_vec[i].r23)); + simde_test_arm_neon_assert_equal_u64x2(r38, simde_vld1q_u64(test_vec[i].r38)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_u64x2(r55, simde_vld1q_u64(test_vec[i].r55)); + simde_test_arm_neon_assert_equal_u64x2(r64, simde_vld1q_u64(test_vec[i].r64)); + #endif + } + + return 0; +} + +static int +test_simde_vcvt_n_f16_u16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + uint16_t a[4]; + simde_float16_t r3[4]; + simde_float16_t r6[4]; + simde_float16_t r10[4]; + simde_float16_t r13[4]; + simde_float16_t r16[4]; + } test_vec[] = { + { { UINT16_C(19849), UINT16_C(26147), UINT16_C(40838), UINT16_C(40781) }, + { SIMDE_FLOAT16_VALUE(2482.0), SIMDE_FLOAT16_VALUE(3268.4), SIMDE_FLOAT16_VALUE(5104.8), SIMDE_FLOAT16_VALUE(5097.6) }, + { SIMDE_FLOAT16_VALUE(310.2), SIMDE_FLOAT16_VALUE(408.5), SIMDE_FLOAT16_VALUE(638.1), SIMDE_FLOAT16_VALUE(637.2) }, + { SIMDE_FLOAT16_VALUE(19.4), SIMDE_FLOAT16_VALUE(25.5), SIMDE_FLOAT16_VALUE(39.9), SIMDE_FLOAT16_VALUE(39.8) }, + { SIMDE_FLOAT16_VALUE(2.4), SIMDE_FLOAT16_VALUE(3.2), SIMDE_FLOAT16_VALUE(5.0), SIMDE_FLOAT16_VALUE(5.0) }, + { SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(0.6), SIMDE_FLOAT16_VALUE(0.6) } }, + { { UINT16_C(10037), UINT16_C(52658), UINT16_C(27371), UINT16_C(28364) }, + { SIMDE_FLOAT16_VALUE(1254.6), SIMDE_FLOAT16_VALUE(6582.3), SIMDE_FLOAT16_VALUE(3421.4), SIMDE_FLOAT16_VALUE(3545.5) }, + { SIMDE_FLOAT16_VALUE(156.875), SIMDE_FLOAT16_VALUE(822.8), SIMDE_FLOAT16_VALUE(427.7), SIMDE_FLOAT16_VALUE(443.2) }, + { SIMDE_FLOAT16_VALUE(9.8), SIMDE_FLOAT16_VALUE(51.4), SIMDE_FLOAT16_VALUE(26.7), SIMDE_FLOAT16_VALUE(27.7) }, + { SIMDE_FLOAT16_VALUE(1.2), SIMDE_FLOAT16_VALUE(6.4), SIMDE_FLOAT16_VALUE(3.3), SIMDE_FLOAT16_VALUE(3.5) }, + { SIMDE_FLOAT16_VALUE(0.2), SIMDE_FLOAT16_VALUE(0.8), SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(0.4) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); + simde_float16x4_t r3 = simde_vcvt_n_f16_u16(a, 3); + simde_float16x4_t r6 = simde_vcvt_n_f16_u16(a, 6); + simde_float16x4_t r10 = simde_vcvt_n_f16_u16(a, 10); + simde_float16x4_t r13 = simde_vcvt_n_f16_u16(a, 13); + simde_float16x4_t r16 = simde_vcvt_n_f16_u16(a, 16); + + simde_test_arm_neon_assert_equal_f16x4(r3, simde_vld1_f16(test_vec[i].r3), 1); + simde_test_arm_neon_assert_equal_f16x4(r6, simde_vld1_f16(test_vec[i].r6), 1); + simde_test_arm_neon_assert_equal_f16x4(r10, simde_vld1_f16(test_vec[i].r10), 1); + simde_test_arm_neon_assert_equal_f16x4(r13, simde_vld1_f16(test_vec[i].r13), 1); + simde_test_arm_neon_assert_equal_f16x4(r16, simde_vld1_f16(test_vec[i].r16), 1); + } + + return 0; +} + +static int +test_simde_vcvt_n_f16_s16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + int16_t a[4]; + simde_float16_t r3[4]; + simde_float16_t r6[4]; + simde_float16_t r10[4]; + simde_float16_t r13[4]; + simde_float16_t r16[4]; + } test_vec[] = { + { { INT16_C(-1573), INT16_C(-19221), INT16_C(23775), INT16_C(-21379) }, + { SIMDE_FLOAT16_VALUE(-196.625), SIMDE_FLOAT16_VALUE(-2402.6), SIMDE_FLOAT16_VALUE(2971.9), SIMDE_FLOAT16_VALUE(-2672.4) }, + { SIMDE_FLOAT16_VALUE(-24.578125), SIMDE_FLOAT16_VALUE(-300.25), SIMDE_FLOAT16_VALUE(371.5), SIMDE_FLOAT16_VALUE(-334.0) }, + { SIMDE_FLOAT16_VALUE(-1.536133), SIMDE_FLOAT16_VALUE(-18.765625), SIMDE_FLOAT16_VALUE(23.218750), SIMDE_FLOAT16_VALUE(-20.8750) }, + { SIMDE_FLOAT16_VALUE(-0.192017), SIMDE_FLOAT16_VALUE(-2.345703), SIMDE_FLOAT16_VALUE(2.902344), SIMDE_FLOAT16_VALUE(-2.609375) }, + { SIMDE_FLOAT16_VALUE(0.0), SIMDE_FLOAT16_VALUE(-0.293213), SIMDE_FLOAT16_VALUE(0.362793), SIMDE_FLOAT16_VALUE(-0.326172) } }, + { { INT16_C(-19672), INT16_C(2663), INT16_C(31268), INT16_C(-11631) }, + { SIMDE_FLOAT16_VALUE(-2460.0), SIMDE_FLOAT16_VALUE(333.0), SIMDE_FLOAT16_VALUE(3908.5), SIMDE_FLOAT16_VALUE(-1454.0) }, + { SIMDE_FLOAT16_VALUE(-307.5), SIMDE_FLOAT16_VALUE(41.625), SIMDE_FLOAT16_VALUE(488.5), SIMDE_FLOAT16_VALUE(-181.75) }, + { SIMDE_FLOAT16_VALUE(-19.21875), SIMDE_FLOAT16_VALUE(2.601562), SIMDE_FLOAT16_VALUE(30.531250), SIMDE_FLOAT16_VALUE(-11.359375) }, + { SIMDE_FLOAT16_VALUE(-2.402344), SIMDE_FLOAT16_VALUE(0.325195), SIMDE_FLOAT16_VALUE(3.816406), SIMDE_FLOAT16_VALUE(-1.419922) }, + { SIMDE_FLOAT16_VALUE(-0.300293), SIMDE_FLOAT16_VALUE(0.040649), SIMDE_FLOAT16_VALUE(0.477051), SIMDE_FLOAT16_VALUE(-0.177490) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); + simde_float16x4_t r3 = simde_vcvt_n_f16_s16(a, 3); + simde_float16x4_t r6 = simde_vcvt_n_f16_s16(a, 6); + simde_float16x4_t r10 = simde_vcvt_n_f16_s16(a, 10); + simde_float16x4_t r13 = simde_vcvt_n_f16_s16(a, 13); + simde_float16x4_t r16 = simde_vcvt_n_f16_s16(a, 16); + + simde_test_arm_neon_assert_equal_f16x4(r3, simde_vld1_f16(test_vec[i].r3), 1); + simde_test_arm_neon_assert_equal_f16x4(r6, simde_vld1_f16(test_vec[i].r6), 1); + simde_test_arm_neon_assert_equal_f16x4(r10, simde_vld1_f16(test_vec[i].r10), 1); + simde_test_arm_neon_assert_equal_f16x4(r13, simde_vld1_f16(test_vec[i].r13), 1); + simde_test_arm_neon_assert_equal_f16x4(r16, simde_vld1_f16(test_vec[i].r16), 1); + } + + return 0; +} + +static int +test_simde_vcvtq_n_f16_u16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + uint16_t a[8]; + simde_float16_t r3[8]; + simde_float16_t r6[8]; + simde_float16_t r10[8]; + simde_float16_t r13[8]; + simde_float16_t r16[8]; + } test_vec[] = { + { { UINT16_C(19849), UINT16_C(26147), UINT16_C(40838), UINT16_C(40781), UINT16_C(10037), UINT16_C(52658), UINT16_C(27371), UINT16_C(28364) }, + { SIMDE_FLOAT16_VALUE(2481.1), SIMDE_FLOAT16_VALUE(3268.4), SIMDE_FLOAT16_VALUE(5104.8), SIMDE_FLOAT16_VALUE(5097.6), SIMDE_FLOAT16_VALUE(1254.6), SIMDE_FLOAT16_VALUE(6582.3), SIMDE_FLOAT16_VALUE(3421.4), SIMDE_FLOAT16_VALUE(3545.5) }, + { SIMDE_FLOAT16_VALUE(310.2), SIMDE_FLOAT16_VALUE(408.5), SIMDE_FLOAT16_VALUE(638.1), SIMDE_FLOAT16_VALUE(637.2), SIMDE_FLOAT16_VALUE(156.875), SIMDE_FLOAT16_VALUE(822.8), SIMDE_FLOAT16_VALUE(427.7), SIMDE_FLOAT16_VALUE(443.2) }, + { SIMDE_FLOAT16_VALUE(19.4), SIMDE_FLOAT16_VALUE(25.5), SIMDE_FLOAT16_VALUE(39.9), SIMDE_FLOAT16_VALUE(39.8), SIMDE_FLOAT16_VALUE(9.8), SIMDE_FLOAT16_VALUE(51.4), SIMDE_FLOAT16_VALUE(26.7), SIMDE_FLOAT16_VALUE(27.7) }, + { SIMDE_FLOAT16_VALUE(2.4), SIMDE_FLOAT16_VALUE(3.2), SIMDE_FLOAT16_VALUE(5.0), SIMDE_FLOAT16_VALUE(5.0), SIMDE_FLOAT16_VALUE(1.2), SIMDE_FLOAT16_VALUE(6.4), SIMDE_FLOAT16_VALUE(3.3), SIMDE_FLOAT16_VALUE(3.5) }, + { SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(0.6), SIMDE_FLOAT16_VALUE(0.6), SIMDE_FLOAT16_VALUE(0.2), SIMDE_FLOAT16_VALUE(0.8), SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(0.4) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); + simde_float16x8_t r3 = simde_vcvtq_n_f16_u16(a, 3); + simde_float16x8_t r6 = simde_vcvtq_n_f16_u16(a, 6); + simde_float16x8_t r10 = simde_vcvtq_n_f16_u16(a, 10); + simde_float16x8_t r13 = simde_vcvtq_n_f16_u16(a, 13); + simde_float16x8_t r16 = simde_vcvtq_n_f16_u16(a, 16); + + simde_test_arm_neon_assert_equal_f16x8(r3, simde_vld1q_f16(test_vec[i].r3), 1); + simde_test_arm_neon_assert_equal_f16x8(r6, simde_vld1q_f16(test_vec[i].r6), 1); + simde_test_arm_neon_assert_equal_f16x8(r10, simde_vld1q_f16(test_vec[i].r10), 1); + simde_test_arm_neon_assert_equal_f16x8(r13, simde_vld1q_f16(test_vec[i].r13), 1); + simde_test_arm_neon_assert_equal_f16x8(r16, simde_vld1q_f16(test_vec[i].r16), 1); + } + + return 0; +} + +static int +test_simde_vcvtq_n_f16_s16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + int16_t a[8]; + simde_float16_t r3[8]; + simde_float16_t r6[8]; + simde_float16_t r10[8]; + simde_float16_t r13[8]; + simde_float16_t r16[8]; + } test_vec[] = { + { { INT16_C(-1573), INT16_C(-19221), INT16_C(23775), INT16_C(-21379), INT16_C(-19672), INT16_C(2663), INT16_C(31268), INT16_C(-11631) }, + { SIMDE_FLOAT16_VALUE(-196.625), SIMDE_FLOAT16_VALUE(-2402.6), SIMDE_FLOAT16_VALUE(2971.9), SIMDE_FLOAT16_VALUE(-2672.4), SIMDE_FLOAT16_VALUE(-2460.0), SIMDE_FLOAT16_VALUE(333.0), SIMDE_FLOAT16_VALUE(3908.5), SIMDE_FLOAT16_VALUE(-1454.0) }, + { SIMDE_FLOAT16_VALUE(-24.578125), SIMDE_FLOAT16_VALUE(-300.25), SIMDE_FLOAT16_VALUE(371.5), SIMDE_FLOAT16_VALUE(-334.0), SIMDE_FLOAT16_VALUE(-307.5), SIMDE_FLOAT16_VALUE(41.625), SIMDE_FLOAT16_VALUE(488.5), SIMDE_FLOAT16_VALUE(-181.75) }, + { SIMDE_FLOAT16_VALUE(-1.536133), SIMDE_FLOAT16_VALUE(-18.765625), SIMDE_FLOAT16_VALUE(23.218750), SIMDE_FLOAT16_VALUE(-20.8750), SIMDE_FLOAT16_VALUE(-19.21875), SIMDE_FLOAT16_VALUE(2.601562), SIMDE_FLOAT16_VALUE(30.531250), SIMDE_FLOAT16_VALUE(-11.359375) }, + { SIMDE_FLOAT16_VALUE(-0.192017), SIMDE_FLOAT16_VALUE(-2.345703), SIMDE_FLOAT16_VALUE(2.902344), SIMDE_FLOAT16_VALUE(-2.609375), SIMDE_FLOAT16_VALUE(-2.402344), SIMDE_FLOAT16_VALUE(0.325195), SIMDE_FLOAT16_VALUE(3.816406), SIMDE_FLOAT16_VALUE(-1.419922) }, + { SIMDE_FLOAT16_VALUE(0.0), SIMDE_FLOAT16_VALUE(-0.293213), SIMDE_FLOAT16_VALUE(0.362793), SIMDE_FLOAT16_VALUE(-0.326172), SIMDE_FLOAT16_VALUE(-0.300293), SIMDE_FLOAT16_VALUE(0.040649), SIMDE_FLOAT16_VALUE(0.477051), SIMDE_FLOAT16_VALUE(-0.177490) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + simde_float16x8_t r3 = simde_vcvtq_n_f16_s16(a, 3); + simde_float16x8_t r6 = simde_vcvtq_n_f16_s16(a, 6); + simde_float16x8_t r10 = simde_vcvtq_n_f16_s16(a, 10); + simde_float16x8_t r13 = simde_vcvtq_n_f16_s16(a, 13); + simde_float16x8_t r16 = simde_vcvtq_n_f16_s16(a, 16); + + simde_test_arm_neon_assert_equal_f16x8(r3, simde_vld1q_f16(test_vec[i].r3), 1); + simde_test_arm_neon_assert_equal_f16x8(r6, simde_vld1q_f16(test_vec[i].r6), 1); + simde_test_arm_neon_assert_equal_f16x8(r10, simde_vld1q_f16(test_vec[i].r10), 1); + simde_test_arm_neon_assert_equal_f16x8(r13, simde_vld1q_f16(test_vec[i].r13), 1); + simde_test_arm_neon_assert_equal_f16x8(r16, simde_vld1q_f16(test_vec[i].r16), 1); + } + + return 0; +} + +static int +test_simde_vcvtq_n_f32_s32 (SIMDE_MUNIT_TEST_ARGS) { + struct { + int32_t a[4]; + simde_float32 r3[4]; + simde_float32 r10[4]; + simde_float32 r16[4]; + simde_float32 r23[4]; + simde_float32 r32[4]; + } test_vec[] = { + { { -INT32_C(1577698352), INT32_C(1627417640), INT32_C(1166530302), -INT32_C(158461010) }, + { SIMDE_FLOAT32_C(-197212288.0), SIMDE_FLOAT32_C(203427200.0), SIMDE_FLOAT32_C(145816288.0), SIMDE_FLOAT32_C(-19807626.0) }, + { SIMDE_FLOAT32_C(-1540721.0), SIMDE_FLOAT32_C(1589275.0), SIMDE_FLOAT32_C(1139189.75), SIMDE_FLOAT32_C(-154747.07) }, + { SIMDE_FLOAT32_C(-24073.76), SIMDE_FLOAT32_C(24832.42), SIMDE_FLOAT32_C(17799.83), SIMDE_FLOAT32_C(-2417.92) }, + { SIMDE_FLOAT32_C(-188.07), SIMDE_FLOAT32_C(194.003), SIMDE_FLOAT32_C(139.06), SIMDE_FLOAT32_C(-18.89) }, + { SIMDE_FLOAT32_C(-0.37), SIMDE_FLOAT32_C(0.38), SIMDE_FLOAT32_C(0.27), SIMDE_FLOAT32_C(-0.04) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_float32x4_t r3 = simde_vcvtq_n_f32_s32(a, 3); + simde_float32x4_t r10 = simde_vcvtq_n_f32_s32(a, 10); + simde_float32x4_t r16 = simde_vcvtq_n_f32_s32(a, 16); + simde_float32x4_t r23 = simde_vcvtq_n_f32_s32(a, 23); + simde_float32x4_t r32 = simde_vcvtq_n_f32_s32(a, 32); + + simde_test_arm_neon_assert_equal_f32x4(r3, simde_vld1q_f32(test_vec[i].r3), 1); + simde_test_arm_neon_assert_equal_f32x4(r10, simde_vld1q_f32(test_vec[i].r10), 1); + simde_test_arm_neon_assert_equal_f32x4(r16, simde_vld1q_f32(test_vec[i].r16), 1); + simde_test_arm_neon_assert_equal_f32x4(r23, simde_vld1q_f32(test_vec[i].r23), 1); + simde_test_arm_neon_assert_equal_f32x4(r32, simde_vld1q_f32(test_vec[i].r32), 1); + } + + return 0; +} + +static int +test_simde_vcvt_n_f32_s32 (SIMDE_MUNIT_TEST_ARGS) { + struct { + int32_t a[2]; + simde_float32 r3[2]; + simde_float32 r10[2]; + simde_float32 r16[2]; + simde_float32 r23[2]; + simde_float32 r32[2]; + } test_vec[] = { + { { -INT32_C(1577698352), INT32_C(1627417640) }, + { SIMDE_FLOAT32_C(-197212288.0), SIMDE_FLOAT32_C(203427200.0) }, + { SIMDE_FLOAT32_C(-1540721.0), SIMDE_FLOAT32_C(1589275.0) }, + { SIMDE_FLOAT32_C(-24073.76), SIMDE_FLOAT32_C(24832.42) }, + { SIMDE_FLOAT32_C(-188.07), SIMDE_FLOAT32_C(194.003) }, + { SIMDE_FLOAT32_C(-0.37), SIMDE_FLOAT32_C(0.38) } }, + { { INT32_C(1166530302), -INT32_C(158461010) }, + { SIMDE_FLOAT32_C(145816288.0), SIMDE_FLOAT32_C(-19807626.0) }, + { SIMDE_FLOAT32_C(1139189.75), SIMDE_FLOAT32_C(-154747.07) }, + { SIMDE_FLOAT32_C(17799.83), SIMDE_FLOAT32_C(-2417.92) }, + { SIMDE_FLOAT32_C(139.06), SIMDE_FLOAT32_C(-18.89) }, + { SIMDE_FLOAT32_C(0.27), SIMDE_FLOAT32_C(-0.04) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); + simde_float32x2_t r3 = simde_vcvt_n_f32_s32(a, 3); + simde_float32x2_t r10 = simde_vcvt_n_f32_s32(a, 10); + simde_float32x2_t r16 = simde_vcvt_n_f32_s32(a, 16); + simde_float32x2_t r23 = simde_vcvt_n_f32_s32(a, 23); + simde_float32x2_t r32 = simde_vcvt_n_f32_s32(a, 32); + + simde_test_arm_neon_assert_equal_f32x2(r3, simde_vld1_f32(test_vec[i].r3), 1); + simde_test_arm_neon_assert_equal_f32x2(r10, simde_vld1_f32(test_vec[i].r10), 1); + simde_test_arm_neon_assert_equal_f32x2(r16, simde_vld1_f32(test_vec[i].r16), 1); + simde_test_arm_neon_assert_equal_f32x2(r23, simde_vld1_f32(test_vec[i].r23), 1); + simde_test_arm_neon_assert_equal_f32x2(r32, simde_vld1_f32(test_vec[i].r32), 1); + } + + return 0; +} + +static int +test_simde_vcvt_n_f64_u64 (SIMDE_MUNIT_TEST_ARGS) { + struct { + uint64_t a[1]; + simde_float64 r3[1]; + simde_float64 r17[1]; + simde_float64 r23[1]; + simde_float64 r38[1]; + simde_float64 r55[1]; + simde_float64 r64[1]; + } test_vec[] = { + { { UINT64_C(1686065688) }, + { SIMDE_FLOAT64_C(210758211.000000) }, + { SIMDE_FLOAT64_C(12863.660000) }, + { SIMDE_FLOAT64_C(200.990000) }, + { SIMDE_FLOAT64_C(0.010000) }, + { SIMDE_FLOAT64_C(0.000000) }, + { SIMDE_FLOAT64_C(0.000000) } }, + { { UINT64_C(763499258) }, + { SIMDE_FLOAT64_C(95437407.250000) }, + { SIMDE_FLOAT64_C(5825.040000) }, + { SIMDE_FLOAT64_C(91.020000) }, + { SIMDE_FLOAT64_C(0.000000) }, + { SIMDE_FLOAT64_C(0.000000) }, + { SIMDE_FLOAT64_C(0.000000) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); + simde_float64x1_t r3 = simde_vcvt_n_f64_u64(a, 3); + simde_float64x1_t r17 = simde_vcvt_n_f64_u64(a, 17); + simde_float64x1_t r23 = simde_vcvt_n_f64_u64(a, 23); + simde_float64x1_t r38 = simde_vcvt_n_f64_u64(a, 38); + simde_float64x1_t r55 = simde_vcvt_n_f64_u64(a, 55); + simde_float64x1_t r64 = simde_vcvt_n_f64_u64(a, 64); + + simde_test_arm_neon_assert_equal_f64x1(r3, simde_vld1_f64(test_vec[i].r3), 1); + simde_test_arm_neon_assert_equal_f64x1(r17, simde_vld1_f64(test_vec[i].r17), 1); + simde_test_arm_neon_assert_equal_f64x1(r23, simde_vld1_f64(test_vec[i].r23), 1); + simde_test_arm_neon_assert_equal_f64x1(r38, simde_vld1_f64(test_vec[i].r38), 1); + simde_test_arm_neon_assert_equal_f64x1(r55, simde_vld1_f64(test_vec[i].r55), 1); + simde_test_arm_neon_assert_equal_f64x1(r64, simde_vld1_f64(test_vec[i].r64), 1); + } + + return 0; +} + +/* Eric: Skip this function since it will trigger a compiler error when using i686-linux-gnu-g++-11. +static int +test_simde_vcvtq_n_f64_u64 (SIMDE_MUNIT_TEST_ARGS) { + struct { + uint64_t a[2]; + simde_float64 r3[2]; + simde_float64 r17[2]; + simde_float64 r23[2]; + simde_float64 r38[2]; + simde_float64 r55[2]; + simde_float64 r64[2]; + } test_vec[] = { + { { UINT64_C(18446744073618801398), UINT64_C(1801750886) }, + { SIMDE_FLOAT64_C(2305843009202350080.000000), SIMDE_FLOAT64_C(225218860.750000) }, + { SIMDE_FLOAT64_C(140737488354635.625000), SIMDE_FLOAT64_C(13746.270000) }, + { SIMDE_FLOAT64_C(2199023255541.179932), SIMDE_FLOAT64_C(214.790000) }, + { SIMDE_FLOAT64_C(67108864.000000), SIMDE_FLOAT64_C(0.010000) }, + { SIMDE_FLOAT64_C(512.000000), SIMDE_FLOAT64_C(0.000000) }, + { SIMDE_FLOAT64_C(1.000000), SIMDE_FLOAT64_C(0.000000) } }, + { { UINT64_C(18446744072095273152), UINT64_C(34887362) }, + { SIMDE_FLOAT64_C(2305843009011909376.000000), SIMDE_FLOAT64_C(4360920.250000) }, + { SIMDE_FLOAT64_C(140737488343012.046875), SIMDE_FLOAT64_C(266.170000) }, + { SIMDE_FLOAT64_C(2199023255359.560059), SIMDE_FLOAT64_C(4.160000) }, + { SIMDE_FLOAT64_C(67108863.990000), SIMDE_FLOAT64_C(0.000000) }, + { SIMDE_FLOAT64_C(512.000000), SIMDE_FLOAT64_C(0.000000) }, + { SIMDE_FLOAT64_C(1.000000), SIMDE_FLOAT64_C(0.000000) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); + simde_float64x2_t r3 = simde_vcvtq_n_f64_u64(a, 3); + simde_float64x2_t r17 = simde_vcvtq_n_f64_u64(a, 17); + simde_float64x2_t r23 = simde_vcvtq_n_f64_u64(a, 23); + simde_float64x2_t r38 = simde_vcvtq_n_f64_u64(a, 38); + simde_float64x2_t r55 = simde_vcvtq_n_f64_u64(a, 55); + simde_float64x2_t r64 = simde_vcvtq_n_f64_u64(a, 64); + + simde_test_arm_neon_assert_equal_f64x2(r3, simde_vld1q_f64(test_vec[i].r3), 1); + simde_test_arm_neon_assert_equal_f64x2(r17, simde_vld1q_f64(test_vec[i].r17), 1); + simde_test_arm_neon_assert_equal_f64x2(r23, simde_vld1q_f64(test_vec[i].r23), 1); + simde_test_arm_neon_assert_equal_f64x2(r38, simde_vld1q_f64(test_vec[i].r38), 1); + simde_test_arm_neon_assert_equal_f64x2(r55, simde_vld1q_f64(test_vec[i].r55), 1); + simde_test_arm_neon_assert_equal_f64x2(r64, simde_vld1q_f64(test_vec[i].r64), 1); + } + + return 0; +} +*/ + +static int +test_simde_vcvt_n_f64_s64 (SIMDE_MUNIT_TEST_ARGS) { + struct { + int64_t a[1]; + simde_float64 r3[1]; + simde_float64 r17[1]; + simde_float64 r23[1]; + simde_float64 r38[1]; + simde_float64 r55[1]; + simde_float64 r64[1]; + } test_vec[] = { + { { -INT64_C(430855472) }, + { SIMDE_FLOAT64_C(-53856934.000000) }, + { SIMDE_FLOAT64_C(-3287.170000) }, + { SIMDE_FLOAT64_C(-51.360000) }, + { SIMDE_FLOAT64_C(-0.000000) }, + { SIMDE_FLOAT64_C(-0.000000) }, + { SIMDE_FLOAT64_C(-0.000000) } }, + { { INT64_C(163557546) }, + { SIMDE_FLOAT64_C(20444693.250000) }, + { SIMDE_FLOAT64_C(1247.850000) }, + { SIMDE_FLOAT64_C(19.500000) }, + { SIMDE_FLOAT64_C(0.000000) }, + { SIMDE_FLOAT64_C(0.000000) }, + { SIMDE_FLOAT64_C(0.000000) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); + simde_float64x1_t r3 = simde_vcvt_n_f64_s64(a, 3); + simde_float64x1_t r17 = simde_vcvt_n_f64_s64(a, 17); + simde_float64x1_t r23 = simde_vcvt_n_f64_s64(a, 23); + simde_float64x1_t r38 = simde_vcvt_n_f64_s64(a, 38); + simde_float64x1_t r55 = simde_vcvt_n_f64_s64(a, 55); + simde_float64x1_t r64 = simde_vcvt_n_f64_s64(a, 64); + + simde_test_arm_neon_assert_equal_f64x1(r3, simde_vld1_f64(test_vec[i].r3), 1); + simde_test_arm_neon_assert_equal_f64x1(r17, simde_vld1_f64(test_vec[i].r17), 1); + simde_test_arm_neon_assert_equal_f64x1(r23, simde_vld1_f64(test_vec[i].r23), 1); + simde_test_arm_neon_assert_equal_f64x1(r38, simde_vld1_f64(test_vec[i].r38), 1); + simde_test_arm_neon_assert_equal_f64x1(r55, simde_vld1_f64(test_vec[i].r55), 1); + simde_test_arm_neon_assert_equal_f64x1(r64, simde_vld1_f64(test_vec[i].r64), 1); + } + + return 0; +} + +static int +test_simde_vcvtq_n_f64_s64 (SIMDE_MUNIT_TEST_ARGS) { + struct { + int64_t a[2]; + simde_float64 r3[2]; + simde_float64 r17[2]; + simde_float64 r23[2]; + simde_float64 r38[2]; + simde_float64 r55[2]; + simde_float64 r64[2]; + } test_vec[] = { + { { -INT64_C(430855472), INT64_C(163557546) }, + { SIMDE_FLOAT64_C(-53856934.000000), SIMDE_FLOAT64_C(20444693.250000) }, + { SIMDE_FLOAT64_C(-3287.170000), SIMDE_FLOAT64_C(1247.850000) }, + { SIMDE_FLOAT64_C(-51.360000), SIMDE_FLOAT64_C(19.500000) }, + { SIMDE_FLOAT64_C(-0.000000), SIMDE_FLOAT64_C(0.000000) }, + { SIMDE_FLOAT64_C(-0.000000), SIMDE_FLOAT64_C(0.000000) }, + { SIMDE_FLOAT64_C(-0.000000), SIMDE_FLOAT64_C(0.000000) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_float64x2_t r3 = simde_vcvtq_n_f64_s64(a, 3); + simde_float64x2_t r17 = simde_vcvtq_n_f64_s64(a, 17); + simde_float64x2_t r23 = simde_vcvtq_n_f64_s64(a, 23); + simde_float64x2_t r38 = simde_vcvtq_n_f64_s64(a, 38); + simde_float64x2_t r55 = simde_vcvtq_n_f64_s64(a, 55); + simde_float64x2_t r64 = simde_vcvtq_n_f64_s64(a, 64); + + simde_test_arm_neon_assert_equal_f64x2(r3, simde_vld1q_f64(test_vec[i].r3), 1); + simde_test_arm_neon_assert_equal_f64x2(r17, simde_vld1q_f64(test_vec[i].r17), 1); + simde_test_arm_neon_assert_equal_f64x2(r23, simde_vld1q_f64(test_vec[i].r23), 1); + simde_test_arm_neon_assert_equal_f64x2(r38, simde_vld1q_f64(test_vec[i].r38), 1); + simde_test_arm_neon_assert_equal_f64x2(r55, simde_vld1q_f64(test_vec[i].r55), 1); + simde_test_arm_neon_assert_equal_f64x2(r64, simde_vld1q_f64(test_vec[i].r64), 1); + } + + return 0; +} + +static int +test_simde_vcvtq_n_f32_u32 (SIMDE_MUNIT_TEST_ARGS) { + struct { + uint32_t a[4]; + simde_float32 r3[4]; + simde_float32 r10[4]; + simde_float32 r16[4]; + simde_float32 r23[4]; + simde_float32 r32[4]; + } test_vec[] = { + { { UINT32_C(2614615940), UINT32_C(11008892), UINT32_C(1049754994), UINT32_C(357707956) }, + { SIMDE_FLOAT32_C(326826992.500000), SIMDE_FLOAT32_C(1376111.500000), SIMDE_FLOAT32_C(131219374.250000), SIMDE_FLOAT32_C(44713494.500000) }, + { SIMDE_FLOAT32_C(2553336.0), SIMDE_FLOAT32_C(10750.87), SIMDE_FLOAT32_C(1025151.375), SIMDE_FLOAT32_C(349324.1875) }, + { SIMDE_FLOAT32_C(39895.87), SIMDE_FLOAT32_C(167.98), SIMDE_FLOAT32_C(16017.99), SIMDE_FLOAT32_C(5458.19) }, + { SIMDE_FLOAT32_C(311.686), SIMDE_FLOAT32_C(1.312), SIMDE_FLOAT32_C(125.14), SIMDE_FLOAT32_C(42.642) }, + { SIMDE_FLOAT32_C(0.610000), SIMDE_FLOAT32_C(0.000000), SIMDE_FLOAT32_C(0.240000), SIMDE_FLOAT32_C(0.080000) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + simde_float32x4_t r3 = simde_vcvtq_n_f32_u32(a, 3); + simde_float32x4_t r10 = simde_vcvtq_n_f32_u32(a, 10); + simde_float32x4_t r16 = simde_vcvtq_n_f32_u32(a, 16); + simde_float32x4_t r23 = simde_vcvtq_n_f32_u32(a, 23); + simde_float32x4_t r32 = simde_vcvtq_n_f32_u32(a, 32); + + simde_test_arm_neon_assert_equal_f32x4(r3, simde_vld1q_f32(test_vec[i].r3), 1); + simde_test_arm_neon_assert_equal_f32x4(r10, simde_vld1q_f32(test_vec[i].r10), 1); + simde_test_arm_neon_assert_equal_f32x4(r16, simde_vld1q_f32(test_vec[i].r16), 1); + simde_test_arm_neon_assert_equal_f32x4(r23, simde_vld1q_f32(test_vec[i].r23), 1); + simde_test_arm_neon_assert_equal_f32x4(r32, simde_vld1q_f32(test_vec[i].r32), 1); + } + + return 0; +} + +static int +test_simde_vcvt_n_f32_u32 (SIMDE_MUNIT_TEST_ARGS) { + struct { + uint32_t a[2]; + simde_float32 r3[2]; + simde_float32 r10[2]; + simde_float32 r16[2]; + simde_float32 r23[2]; + simde_float32 r32[2]; + } test_vec[] = { + { { UINT32_C(2614615940), UINT32_C(11008892) }, + { SIMDE_FLOAT32_C(326826992.500000), SIMDE_FLOAT32_C(1376111.500000) }, + { SIMDE_FLOAT32_C(2553336.0), SIMDE_FLOAT32_C(10750.87) }, + { SIMDE_FLOAT32_C(39895.87), SIMDE_FLOAT32_C(167.98) }, + { SIMDE_FLOAT32_C(311.686), SIMDE_FLOAT32_C(1.312) }, + { SIMDE_FLOAT32_C(0.610000), SIMDE_FLOAT32_C(0.000000) } }, + { { UINT32_C(1049754994), UINT32_C(357707956) }, + { SIMDE_FLOAT32_C(131219374.250000), SIMDE_FLOAT32_C(44713494.500000) }, + { SIMDE_FLOAT32_C(1025151.375), SIMDE_FLOAT32_C(349324.1875) }, + { SIMDE_FLOAT32_C(16017.99), SIMDE_FLOAT32_C(5458.19) }, + { SIMDE_FLOAT32_C(125.14), SIMDE_FLOAT32_C(42.642) }, + { SIMDE_FLOAT32_C(0.240000), SIMDE_FLOAT32_C(0.080000) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); + simde_float32x2_t r3 = simde_vcvt_n_f32_u32(a, 3); + simde_float32x2_t r10 = simde_vcvt_n_f32_u32(a, 10); + simde_float32x2_t r16 = simde_vcvt_n_f32_u32(a, 16); + simde_float32x2_t r23 = simde_vcvt_n_f32_u32(a, 23); + simde_float32x2_t r32 = simde_vcvt_n_f32_u32(a, 32); + + simde_test_arm_neon_assert_equal_f32x2(r3, simde_vld1_f32(test_vec[i].r3), 1); + simde_test_arm_neon_assert_equal_f32x2(r10, simde_vld1_f32(test_vec[i].r10), 1); + simde_test_arm_neon_assert_equal_f32x2(r16, simde_vld1_f32(test_vec[i].r16), 1); + simde_test_arm_neon_assert_equal_f32x2(r23, simde_vld1_f32(test_vec[i].r23), 1); + simde_test_arm_neon_assert_equal_f32x2(r32, simde_vld1_f32(test_vec[i].r32), 1); + } + + return 0; +} + + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_n_s16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_n_s32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_n_s64_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_n_u16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_n_u32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_n_u64_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_n_f16_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_n_f16_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_n_f32_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_n_f32_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_n_f64_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_n_f64_u64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtq_n_f16_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtq_n_f16_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtq_n_f32_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtq_n_f32_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtq_n_f64_s64) +//SIMDE_TEST_FUNC_LIST_ENTRY(vcvtq_n_f64_u64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtq_n_s16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtq_n_s32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtq_n_s64_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtq_n_u16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtq_n_u32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtq_n_u64_f64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/cvtn.c b/test/arm/neon/cvtn.c index d76dbfd0e..7a94cd469 100644 --- a/test/arm/neon/cvtn.c +++ b/test/arm/neon/cvtn.c @@ -93,6 +93,293 @@ test_simde_vcvtnq_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vcvtnh_s64_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a; + int64_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT64_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT64_MAX)), + INT64_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT64_MIN)), + INT64_MIN }, + #endif + { SIMDE_FLOAT16_VALUE( 12.44), + INT64_C( 12) }, + { SIMDE_FLOAT16_VALUE( 30.46), + INT64_C( 30) }, + { SIMDE_FLOAT16_VALUE( 16.51), + INT64_C( 17) }, + { SIMDE_FLOAT16_VALUE( 74.89), + INT64_C( 75) }, + { SIMDE_FLOAT16_VALUE( -24.05), + -INT64_C( 24) }, + { SIMDE_FLOAT16_VALUE( -7.75), + -INT64_C( 8) }, + { SIMDE_FLOAT16_VALUE( -57.31), + -INT64_C( 57) }, + { SIMDE_FLOAT16_VALUE( -14.65), + -INT64_C( 15) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + int64_t r = simde_vcvtnh_s64_f16(a); + simde_assert_equal_i64(r, test_vec[i].r); + } + + return 0; +} + +static int +test_simde_vcvtnh_s32_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a; + int32_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT32_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT32_MAX)), + INT32_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)), + INT32_MIN }, + #endif + { SIMDE_FLOAT16_VALUE( 12.44), + INT32_C( 12) }, + { SIMDE_FLOAT16_VALUE( 30.46), + INT32_C( 30) }, + { SIMDE_FLOAT16_VALUE( 16.51), + INT32_C( 17) }, + { SIMDE_FLOAT16_VALUE( 74.89), + INT32_C( 75) }, + { SIMDE_FLOAT16_VALUE( -24.05), + -INT32_C( 24) }, + { SIMDE_FLOAT16_VALUE( -7.75), + -INT32_C( 8) }, + { SIMDE_FLOAT16_VALUE( -57.31), + -INT32_C( 57) }, + { SIMDE_FLOAT16_VALUE( -14.65), + -INT32_C( 15) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + int32_t r = simde_vcvtnh_s32_f16(a); + simde_assert_equal_i32(r, test_vec[i].r); + } + + return 0; +} + +static int +test_simde_vcvtnh_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a; + int16_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT16_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT16_MAX)), + INT16_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT16_MIN)), + INT16_MIN }, + #endif + { SIMDE_FLOAT16_VALUE( 12.44), + INT16_C( 12) }, + { SIMDE_FLOAT16_VALUE( 30.46), + INT16_C( 30) }, + { SIMDE_FLOAT16_VALUE( 16.51), + INT16_C( 17) }, + { SIMDE_FLOAT16_VALUE( 74.89), + INT16_C( 75) }, + { SIMDE_FLOAT16_VALUE( -24.05), + -INT16_C( 24) }, + { SIMDE_FLOAT16_VALUE( -7.75), + -INT16_C( 8) }, + { SIMDE_FLOAT16_VALUE( -57.31), + -INT16_C( 57) }, + { SIMDE_FLOAT16_VALUE( -14.65), + -INT16_C( 15) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + int16_t r = simde_vcvtnh_s16_f16(a); + simde_assert_equal_i16(r, test_vec[i].r); + } + + return 0; +} + +static int +test_simde_vcvtns_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32 a; + int32_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_MATH_NANF, + INT32_C( 0) }, + { HEDLEY_STATIC_CAST(simde_float32, INT32_MAX) + SIMDE_FLOAT32_C(1000.0), + INT32_MAX }, + { HEDLEY_STATIC_CAST(simde_float32, INT32_MIN) + SIMDE_FLOAT32_C(-1000.0), + INT32_MIN }, + #endif + { SIMDE_FLOAT32_C( 192.44), + INT32_C( 192) }, + { SIMDE_FLOAT32_C( 350.46), + INT32_C( 350) }, + { SIMDE_FLOAT32_C( 163.51), + INT32_C( 164) }, + { SIMDE_FLOAT32_C( 974.89), + INT32_C( 975) }, + { SIMDE_FLOAT32_C( -254.05), + -INT32_C( 254) }, + { SIMDE_FLOAT32_C( -707.75), + -INT32_C( 708) }, + { SIMDE_FLOAT32_C( -957.31), + -INT32_C( 957) }, + { SIMDE_FLOAT32_C( -144.65), + -INT32_C( 145) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32 a = test_vec[i].a; + int32_t r = simde_vcvtns_s32_f32(a); + simde_assert_equal_i32(r, test_vec[i].r); + } + + return 0; +} + +static int +test_simde_vcvtnh_u64_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a; + uint64_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + UINT64_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, UINT64_MAX)), + UINT64_MAX }, + { SIMDE_FLOAT16_VALUE( -192.44), + UINT64_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( 192.44), + UINT64_C( 192) }, + { SIMDE_FLOAT16_VALUE( 350.46), + UINT64_C( 350) }, + { SIMDE_FLOAT16_VALUE( 163.51), + UINT64_C( 164) }, + { SIMDE_FLOAT16_VALUE( 974.89), + UINT64_C( 975) }, + { SIMDE_FLOAT16_VALUE( 254.05), + UINT64_C( 254) }, + { SIMDE_FLOAT16_VALUE( 707.75), + UINT64_C( 708) }, + { SIMDE_FLOAT16_VALUE( 57.31), + UINT64_C( 57) }, + { SIMDE_FLOAT16_VALUE( 144.65), + UINT64_C( 145) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + uint64_t r = simde_vcvtnh_u64_f16(a); + simde_assert_equal_u64(r, test_vec[i].r); + } + + return 0; +} + +static int +test_simde_vcvtnh_u32_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a; + uint32_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT32_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX)), + UINT32_MAX }, + { SIMDE_FLOAT16_VALUE( -192.44), + UINT32_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( 192.44), + UINT32_C( 192) }, + { SIMDE_FLOAT16_VALUE( 350.46), + UINT32_C( 350) }, + { SIMDE_FLOAT16_VALUE( 163.51), + UINT32_C( 164) }, + { SIMDE_FLOAT16_VALUE( 974.89), + UINT32_C( 975) }, + { SIMDE_FLOAT16_VALUE( 254.05), + UINT32_C( 254) }, + { SIMDE_FLOAT16_VALUE( 707.75), + UINT32_C( 708) }, + { SIMDE_FLOAT16_VALUE( 57.31), + UINT32_C( 57) }, + { SIMDE_FLOAT16_VALUE( 144.65), + UINT32_C( 145) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + uint32_t r = simde_vcvtnh_u32_f16(a); + simde_assert_equal_u32(r, test_vec[i].r); + } + + return 0; +} + +static int +test_simde_vcvtnh_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a; + uint16_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT16_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, UINT16_MAX)), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE( -192.44), + UINT16_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( 192.44), + UINT16_C( 192) }, + { SIMDE_FLOAT16_VALUE( 350.46), + UINT16_C( 350) }, + { SIMDE_FLOAT16_VALUE( 163.51), + UINT16_C( 164) }, + { SIMDE_FLOAT16_VALUE( 974.89), + UINT16_C( 975) }, + { SIMDE_FLOAT16_VALUE( 254.05), + UINT16_C( 254) }, + { SIMDE_FLOAT16_VALUE( 707.75), + UINT16_C( 708) }, + { SIMDE_FLOAT16_VALUE( 57.31), + UINT16_C( 57) }, + { SIMDE_FLOAT16_VALUE( 144.65), + UINT16_C( 145) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + uint16_t r = simde_vcvtnh_u16_f16(a); + simde_assert_equal_u16(r, test_vec[i].r); + } + + return 0; +} + static int test_simde_vcvtns_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -196,6 +483,47 @@ test_simde_vcvtnq_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vcvtnd_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64 a; + int64_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_MATH_NAN, + INT64_C( 0) }, + { HEDLEY_STATIC_CAST(simde_float64, INT64_MAX) + SIMDE_FLOAT64_C(10000.0), + INT64_MAX }, + { HEDLEY_STATIC_CAST(simde_float64, INT64_MIN) + SIMDE_FLOAT64_C(-10000.0), + INT64_MIN }, + #endif + { SIMDE_FLOAT64_C( 755.50), + INT64_C( 756) }, + { SIMDE_FLOAT64_C( 719.56), + INT64_C( 720) }, + { SIMDE_FLOAT64_C( 939.13), + INT64_C( 939) }, + { SIMDE_FLOAT64_C( 742.14), + INT64_C( 742) }, + { SIMDE_FLOAT64_C( -916.64), + -INT64_C( 917) }, + { SIMDE_FLOAT64_C( -55.73), + -INT64_C( 56) }, + { SIMDE_FLOAT64_C( -511.47), + -INT64_C( 511) }, + { SIMDE_FLOAT64_C( -399.92), + -INT64_C( 400) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64 a = test_vec[i].a; + int64_t r = simde_vcvtnd_s64_f64(a); + simde_assert_equal_i64(r, test_vec[i].r); + } + + return 0; +} + static int test_simde_vcvtnd_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -300,12 +628,307 @@ test_simde_vcvtnq_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vcvtnq_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[8]; + int16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(8.5), SIMDE_FLOAT16_VALUE(-2.4), SIMDE_FLOAT16_VALUE(6.5), SIMDE_FLOAT16_VALUE(3.3), SIMDE_FLOAT16_VALUE(-8.9), SIMDE_FLOAT16_VALUE(4.2), SIMDE_FLOAT16_VALUE(11.5), SIMDE_FLOAT16_VALUE(10.2) }, + { INT16_C(8), -INT16_C(2), INT16_C(6), INT16_C(3), -INT16_C(9), INT16_C(4), INT16_C(12), INT16_C(10) } }, + { { SIMDE_FLOAT16_VALUE(13.1), SIMDE_FLOAT16_VALUE(-12.9), SIMDE_FLOAT16_VALUE(-3.1), SIMDE_FLOAT16_VALUE(8.9), SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(7.2), SIMDE_FLOAT16_VALUE(-3.1), SIMDE_FLOAT16_VALUE(-12.3) }, + { INT16_C(13), -INT16_C(13), -INT16_C(3), INT16_C(9), INT16_C(1), INT16_C(7), -INT16_C(3), -INT16_C(12) } }, + { { SIMDE_FLOAT16_VALUE(-12.0), SIMDE_FLOAT16_VALUE(9.1), SIMDE_FLOAT16_VALUE(8.7), SIMDE_FLOAT16_VALUE(-2.3), SIMDE_FLOAT16_VALUE(-1.0), SIMDE_FLOAT16_VALUE(1.9), SIMDE_FLOAT16_VALUE(-4.8), SIMDE_FLOAT16_VALUE(3.1) }, + { -INT16_C(12), INT16_C(9), INT16_C(9), -INT16_C(2), -INT16_C(1), INT16_C(2), -INT16_C(5), INT16_C(3) } }, + { { SIMDE_FLOAT16_VALUE(8.2), SIMDE_FLOAT16_VALUE(-14.2), SIMDE_FLOAT16_VALUE(9.1), SIMDE_FLOAT16_VALUE(1.3), SIMDE_FLOAT16_VALUE(-12.8), SIMDE_FLOAT16_VALUE(13.9), SIMDE_FLOAT16_VALUE(4.3), SIMDE_FLOAT16_VALUE(-14.8) }, + { INT16_C(8), -INT16_C(14), INT16_C(9), INT16_C(1), -INT16_C(13), INT16_C(14), INT16_C(4), -INT16_C(15) } }, + { { SIMDE_FLOAT16_VALUE(-15.0), SIMDE_FLOAT16_VALUE(14.7), SIMDE_FLOAT16_VALUE(9.9), SIMDE_FLOAT16_VALUE(3.4), SIMDE_FLOAT16_VALUE(5.3), SIMDE_FLOAT16_VALUE(9.0), SIMDE_FLOAT16_VALUE(2.0), SIMDE_FLOAT16_VALUE(0.4) }, + { -INT16_C(15), INT16_C(15), INT16_C(10), INT16_C(3), INT16_C(5), INT16_C(9), INT16_C(2), INT16_C(0) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_int16x8_t r = simde_vcvtnq_s16_f16(a); + simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vcvtn_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + int16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(8.5), SIMDE_FLOAT16_VALUE(-2.4), SIMDE_FLOAT16_VALUE(6.5), SIMDE_FLOAT16_VALUE(3.3) }, + { INT16_C(8), -INT16_C(2), INT16_C(6), INT16_C(3) } }, + { { SIMDE_FLOAT16_VALUE(-8.9), SIMDE_FLOAT16_VALUE(4.2), SIMDE_FLOAT16_VALUE(11.5), SIMDE_FLOAT16_VALUE(10.2) }, + { -INT16_C(9), INT16_C(4), INT16_C(12), INT16_C(10) } }, + { { SIMDE_FLOAT16_VALUE(13.1), SIMDE_FLOAT16_VALUE(-12.9), SIMDE_FLOAT16_VALUE(-3.1), SIMDE_FLOAT16_VALUE(8.9) }, + { INT16_C(13), -INT16_C(13), -INT16_C(3), INT16_C(9) } }, + { { SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(7.2), SIMDE_FLOAT16_VALUE(-3.1), SIMDE_FLOAT16_VALUE(-12.3) }, + { INT16_C(1), INT16_C(7), -INT16_C(3), -INT16_C(12) } }, + { { SIMDE_FLOAT16_VALUE(-12.0), SIMDE_FLOAT16_VALUE(9.1), SIMDE_FLOAT16_VALUE(8.7), SIMDE_FLOAT16_VALUE(-2.3) }, + { -INT16_C(12), INT16_C(9), INT16_C(9), -INT16_C(2) } }, + { { SIMDE_FLOAT16_VALUE(-1.0), SIMDE_FLOAT16_VALUE(1.9), SIMDE_FLOAT16_VALUE(-4.8), SIMDE_FLOAT16_VALUE(3.1) }, + { -INT16_C(1), INT16_C(2), -INT16_C(5), INT16_C(3) } }, + { { SIMDE_FLOAT16_VALUE(8.2), SIMDE_FLOAT16_VALUE(-14.2), SIMDE_FLOAT16_VALUE(9.1), SIMDE_FLOAT16_VALUE(1.3) }, + { INT16_C(8), -INT16_C(14), INT16_C(9), INT16_C(1) } }, + { { SIMDE_FLOAT16_VALUE(-12.8), SIMDE_FLOAT16_VALUE(13.9), SIMDE_FLOAT16_VALUE(4.3), SIMDE_FLOAT16_VALUE(-14.8) }, + { -INT16_C(13), INT16_C(14), INT16_C(4), -INT16_C(15) } }, + { { SIMDE_FLOAT16_VALUE(-15.0), SIMDE_FLOAT16_VALUE(14.7), SIMDE_FLOAT16_VALUE(9.9), SIMDE_FLOAT16_VALUE(3.4) }, + { -INT16_C(15), INT16_C(15), INT16_C(10), INT16_C(3) } }, + { { SIMDE_FLOAT16_VALUE(5.3), SIMDE_FLOAT16_VALUE(9.0), SIMDE_FLOAT16_VALUE(2.0), SIMDE_FLOAT16_VALUE(0.4) }, + { INT16_C(5), INT16_C(9), INT16_C(2), INT16_C(0) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_int16x4_t r = simde_vcvtn_s16_f16(a); + simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vcvtnq_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[8]; + uint16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(4.9), SIMDE_FLOAT16_VALUE(0.5), SIMDE_FLOAT16_VALUE(12.8), SIMDE_FLOAT16_VALUE(11.3), SIMDE_FLOAT16_VALUE(13.5), SIMDE_FLOAT16_VALUE(10.0), SIMDE_FLOAT16_VALUE(13.6), SIMDE_FLOAT16_VALUE(11.1) }, + { UINT16_C(5), UINT16_C(0), UINT16_C(13), UINT16_C(11), UINT16_C(14), UINT16_C(10), UINT16_C(14), UINT16_C(11) } }, + { { SIMDE_FLOAT16_VALUE(9.3), SIMDE_FLOAT16_VALUE(8.3), SIMDE_FLOAT16_VALUE(0.2), SIMDE_FLOAT16_VALUE(0.2), SIMDE_FLOAT16_VALUE(0.0), SIMDE_FLOAT16_VALUE(4.6), SIMDE_FLOAT16_VALUE(11.9), SIMDE_FLOAT16_VALUE(5.0) }, + { UINT16_C(9), UINT16_C(8), UINT16_C(0), UINT16_C(0), UINT16_C(0), UINT16_C(5), UINT16_C(12), UINT16_C(5) } }, + { { SIMDE_FLOAT16_VALUE(1.5), SIMDE_FLOAT16_VALUE(2.7), SIMDE_FLOAT16_VALUE(10.9), SIMDE_FLOAT16_VALUE(4.1), SIMDE_FLOAT16_VALUE(9.0), SIMDE_FLOAT16_VALUE(6.3), SIMDE_FLOAT16_VALUE(13.4), SIMDE_FLOAT16_VALUE(13.1) }, + { UINT16_C(2), UINT16_C(3), UINT16_C(11), UINT16_C(4), UINT16_C(9), UINT16_C(6), UINT16_C(13), UINT16_C(13) } }, + { { SIMDE_FLOAT16_VALUE(13.1), SIMDE_FLOAT16_VALUE(6.2), SIMDE_FLOAT16_VALUE(6.5), SIMDE_FLOAT16_VALUE(1.0), SIMDE_FLOAT16_VALUE(2.5), SIMDE_FLOAT16_VALUE(2.5), SIMDE_FLOAT16_VALUE(6.8), SIMDE_FLOAT16_VALUE(1.9) }, + { UINT16_C(13), UINT16_C(6), UINT16_C(6), UINT16_C(1), UINT16_C(2), UINT16_C(2), UINT16_C(7), UINT16_C(2) } }, + { { SIMDE_FLOAT16_VALUE(5.1), SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(4.8), SIMDE_FLOAT16_VALUE(7.2), SIMDE_FLOAT16_VALUE(10.0), SIMDE_FLOAT16_VALUE(9.9), SIMDE_FLOAT16_VALUE(14.6), SIMDE_FLOAT16_VALUE(4.0) }, + { UINT16_C(5), UINT16_C(0), UINT16_C(5), UINT16_C(7), UINT16_C(10), UINT16_C(10), UINT16_C(15), UINT16_C(4) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_uint16x8_t r = simde_vcvtnq_u16_f16(a); + simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vcvtn_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + uint16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(4.9), SIMDE_FLOAT16_VALUE(0.5), SIMDE_FLOAT16_VALUE(12.8), SIMDE_FLOAT16_VALUE(11.3) }, + { UINT16_C(5), UINT16_C(0), UINT16_C(13), UINT16_C(11) } }, + { { SIMDE_FLOAT16_VALUE(13.5), SIMDE_FLOAT16_VALUE(10.0), SIMDE_FLOAT16_VALUE(13.6), SIMDE_FLOAT16_VALUE(11.1) }, + { UINT16_C(14), UINT16_C(10), UINT16_C(14), UINT16_C(11) } }, + { { SIMDE_FLOAT16_VALUE(9.3), SIMDE_FLOAT16_VALUE(8.3), SIMDE_FLOAT16_VALUE(0.2), SIMDE_FLOAT16_VALUE(0.2) }, + { UINT16_C(9), UINT16_C(8), UINT16_C(0), UINT16_C(0) } }, + { { SIMDE_FLOAT16_VALUE(0.0), SIMDE_FLOAT16_VALUE(4.6), SIMDE_FLOAT16_VALUE(11.9), SIMDE_FLOAT16_VALUE(5.0) }, + { UINT16_C(0), UINT16_C(5), UINT16_C(12), UINT16_C(5) } }, + { { SIMDE_FLOAT16_VALUE(1.5), SIMDE_FLOAT16_VALUE(2.7), SIMDE_FLOAT16_VALUE(10.9), SIMDE_FLOAT16_VALUE(4.1) }, + { UINT16_C(2), UINT16_C(3), UINT16_C(11), UINT16_C(4) } }, + { { SIMDE_FLOAT16_VALUE(9.0), SIMDE_FLOAT16_VALUE(6.3), SIMDE_FLOAT16_VALUE(13.4), SIMDE_FLOAT16_VALUE(13.1) }, + { UINT16_C(9), UINT16_C(6), UINT16_C(13), UINT16_C(13) } }, + { { SIMDE_FLOAT16_VALUE(13.1), SIMDE_FLOAT16_VALUE(6.2), SIMDE_FLOAT16_VALUE(6.5), SIMDE_FLOAT16_VALUE(1.0) }, + { UINT16_C(13), UINT16_C(6), UINT16_C(6), UINT16_C(1) } }, + { { SIMDE_FLOAT16_VALUE(2.5), SIMDE_FLOAT16_VALUE(2.5), SIMDE_FLOAT16_VALUE(6.8), SIMDE_FLOAT16_VALUE(1.9) }, + { UINT16_C(2), UINT16_C(2), UINT16_C(7), UINT16_C(2) } }, + { { SIMDE_FLOAT16_VALUE(5.1), SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(4.8), SIMDE_FLOAT16_VALUE(7.2) }, + { UINT16_C(5), UINT16_C(0), UINT16_C(5), UINT16_C(7) } }, + { { SIMDE_FLOAT16_VALUE(10.0), SIMDE_FLOAT16_VALUE(9.9), SIMDE_FLOAT16_VALUE(14.6), SIMDE_FLOAT16_VALUE(4.0) }, + { UINT16_C(10), UINT16_C(10), UINT16_C(15), UINT16_C(4) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_uint16x4_t r = simde_vcvtn_u16_f16(a); + simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vcvtn_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float32 a[2]; + uint32_t r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(31.72), SIMDE_FLOAT32_C(82.77) }, + { UINT32_C(32), UINT32_C(83) } }, + { { SIMDE_FLOAT32_C(17.70), SIMDE_FLOAT32_C(39.19) }, + { UINT32_C(18), UINT32_C(39) } }, + { { SIMDE_FLOAT32_C(86.63), SIMDE_FLOAT32_C(69.32) }, + { UINT32_C(87), UINT32_C(69) } }, + { { SIMDE_FLOAT32_C(9.24), SIMDE_FLOAT32_C(42.24) }, + { UINT32_C(9), UINT32_C(42) } }, + { { SIMDE_FLOAT32_C(5.52), SIMDE_FLOAT32_C(32.96) }, + { UINT32_C(6), UINT32_C(33) } }, + { { SIMDE_FLOAT32_C(59.77), SIMDE_FLOAT32_C(16.79) }, + { UINT32_C(60), UINT32_C(17) } }, + { { SIMDE_FLOAT32_C(20.26), SIMDE_FLOAT32_C(74.48) }, + { UINT32_C(20), UINT32_C(74) } }, + { { SIMDE_FLOAT32_C(76.43), SIMDE_FLOAT32_C(66.05) }, + { UINT32_C(76), UINT32_C(66) } }, + { { SIMDE_FLOAT32_C(78.67), SIMDE_FLOAT32_C(64.95) }, + { UINT32_C(79), UINT32_C(65) } }, + { { SIMDE_FLOAT32_C(96.96), SIMDE_FLOAT32_C(24.41) }, + { UINT32_C(97), UINT32_C(24) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_uint32x2_t r = simde_vcvtn_u32_f32(a); + simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vcvtn_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float32 a[2]; + int32_t r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(25.79), SIMDE_FLOAT32_C(49.90) }, + { INT32_C(26), INT32_C(50) } }, + { { SIMDE_FLOAT32_C(-94.73), SIMDE_FLOAT32_C(-49.75) }, + { -INT32_C(95), -INT32_C(50) } }, + { { SIMDE_FLOAT32_C(56.53), SIMDE_FLOAT32_C(-28.89) }, + { INT32_C(57), -INT32_C(29) } }, + { { SIMDE_FLOAT32_C(-2.63), SIMDE_FLOAT32_C(54.36) }, + { -INT32_C(3), INT32_C(54) } }, + { { SIMDE_FLOAT32_C(60.53), SIMDE_FLOAT32_C(15.86) }, + { INT32_C(61), INT32_C(16) } }, + { { SIMDE_FLOAT32_C(-83.28), SIMDE_FLOAT32_C(-5.22) }, + { -INT32_C(83), -INT32_C(5) } }, + { { SIMDE_FLOAT32_C(44.28), SIMDE_FLOAT32_C(-14.52) }, + { INT32_C(44), -INT32_C(15) } }, + { { SIMDE_FLOAT32_C(-15.99), SIMDE_FLOAT32_C(80.84) }, + { -INT32_C(16), INT32_C(81) } }, + { { SIMDE_FLOAT32_C(14.60), SIMDE_FLOAT32_C(12.56) }, + { INT32_C(15), INT32_C(13) } }, + { { SIMDE_FLOAT32_C(9.86), SIMDE_FLOAT32_C(3.40) }, + { INT32_C(10), INT32_C(3) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_int32x2_t r = simde_vcvtn_s32_f32(a); + simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vcvtn_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float64 a[1]; + int64_t r[1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(9081.35) }, + { INT64_C(9081) } }, + { { SIMDE_FLOAT64_C(6727.25) }, + { INT64_C(6727) } }, + { { SIMDE_FLOAT64_C(-6846.21) }, + { -INT64_C(6846) } }, + { { SIMDE_FLOAT64_C(199.59) }, + { INT64_C(200) } }, + { { SIMDE_FLOAT64_C(8615.31) }, + { INT64_C(8615) } }, + { { SIMDE_FLOAT64_C(-4053.97) }, + { -INT64_C(4054) } }, + { { SIMDE_FLOAT64_C(-7297.52) }, + { -INT64_C(7298) } }, + { { SIMDE_FLOAT64_C(6497.22) }, + { INT64_C(6497) } }, + { { SIMDE_FLOAT64_C(7123.83) }, + { INT64_C(7124) } }, + { { SIMDE_FLOAT64_C(2924.27) }, + { INT64_C(2924) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); + simde_int64x1_t r = simde_vcvtn_s64_f64(a); + simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vcvtn_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float64 a[1]; + uint64_t r[1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(8228.53) }, + { UINT64_C(8229) } }, + { { SIMDE_FLOAT64_C(2885.15) }, + { UINT64_C(2885) } }, + { { SIMDE_FLOAT64_C(5510.71) }, + { UINT64_C(5511) } }, + { { SIMDE_FLOAT64_C(6587.73) }, + { UINT64_C(6588) } }, + { { SIMDE_FLOAT64_C(2334.53) }, + { UINT64_C(2335) } }, + { { SIMDE_FLOAT64_C(8071.59) }, + { UINT64_C(8072) } }, + { { SIMDE_FLOAT64_C(1644.38) }, + { UINT64_C(1644) } }, + { { SIMDE_FLOAT64_C(9170.38) }, + { UINT64_C(9170) } }, + { { SIMDE_FLOAT64_C(1001.61) }, + { UINT64_C(1002) } }, + { { SIMDE_FLOAT64_C(2770.35) }, + { UINT64_C(2770) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); + simde_uint64x1_t r = simde_vcvtn_u64_f64(a); + simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); + } + + return 0; +} + SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtns_s32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtnd_s64_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtns_u32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtnd_u64_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtnh_s16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtnh_s32_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtnh_s64_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtnh_u16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtnh_u32_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtnh_u64_f16) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtn_s16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtn_u16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtn_u32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtn_s32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtn_s64_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtn_u64_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtnq_s16_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtnq_s32_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtnq_s64_f64) -SIMDE_TEST_FUNC_LIST_ENTRY(vcvtns_u32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtnq_u16_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtnq_u32_f32) -SIMDE_TEST_FUNC_LIST_ENTRY(vcvtnd_u64_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtnq_u64_f64) SIMDE_TEST_FUNC_LIST_END diff --git a/test/arm/neon/dup_lane.c b/test/arm/neon/dup_lane.c index d6b3c03d4..214a91d57 100644 --- a/test/arm/neon/dup_lane.c +++ b/test/arm/neon/dup_lane.c @@ -7,6 +7,105 @@ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +static int +test_simde_vdup_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 vec[4]; + int lane; + simde_float16 r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-7.6), SIMDE_FLOAT16_VALUE(11.4), SIMDE_FLOAT16_VALUE(10.7), SIMDE_FLOAT16_VALUE(-0.1) }, + INT8_C(2), + { SIMDE_FLOAT16_VALUE(10.7), SIMDE_FLOAT16_VALUE(10.7), SIMDE_FLOAT16_VALUE(10.7), SIMDE_FLOAT16_VALUE(10.7) } }, + { { SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(-5.5), SIMDE_FLOAT16_VALUE(3.7), SIMDE_FLOAT16_VALUE(-12.5) }, + INT8_C(0), + { SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(1.4) } }, + { { SIMDE_FLOAT16_VALUE(13.3), SIMDE_FLOAT16_VALUE(-11.6), SIMDE_FLOAT16_VALUE(4.4), SIMDE_FLOAT16_VALUE(-1.3) }, + INT8_C(2), + { SIMDE_FLOAT16_VALUE(4.4), SIMDE_FLOAT16_VALUE(4.4), SIMDE_FLOAT16_VALUE(4.4), SIMDE_FLOAT16_VALUE(4.4) } }, + { { SIMDE_FLOAT16_VALUE(-11.5), SIMDE_FLOAT16_VALUE(-10.6), SIMDE_FLOAT16_VALUE(13.5), SIMDE_FLOAT16_VALUE(-5.6) }, + INT8_C(0), + { SIMDE_FLOAT16_VALUE(-11.5), SIMDE_FLOAT16_VALUE(-11.5), SIMDE_FLOAT16_VALUE(-11.5), SIMDE_FLOAT16_VALUE(-11.5) } }, + { { SIMDE_FLOAT16_VALUE(-12.6), SIMDE_FLOAT16_VALUE(-12.4), SIMDE_FLOAT16_VALUE(11.3), SIMDE_FLOAT16_VALUE(11.8) }, + INT8_C(0), + { SIMDE_FLOAT16_VALUE(-12.6), SIMDE_FLOAT16_VALUE(-12.6), SIMDE_FLOAT16_VALUE(-12.6), SIMDE_FLOAT16_VALUE(-12.6) } }, + { { SIMDE_FLOAT16_VALUE(1.7), SIMDE_FLOAT16_VALUE(-12.1), SIMDE_FLOAT16_VALUE(-11.1), SIMDE_FLOAT16_VALUE(-11.3) }, + INT8_C(1), + { SIMDE_FLOAT16_VALUE(-12.1), SIMDE_FLOAT16_VALUE(-12.1), SIMDE_FLOAT16_VALUE(-12.1), SIMDE_FLOAT16_VALUE(-12.1) } }, + { { SIMDE_FLOAT16_VALUE(-2.9), SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(11.1), SIMDE_FLOAT16_VALUE(8.8) }, + INT8_C(1), + { SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(0.3) } }, + { { SIMDE_FLOAT16_VALUE(8.0), SIMDE_FLOAT16_VALUE(-13.7), SIMDE_FLOAT16_VALUE(-3.6), SIMDE_FLOAT16_VALUE(5.8) }, + INT8_C(0), + { SIMDE_FLOAT16_VALUE(8.0), SIMDE_FLOAT16_VALUE(8.0), SIMDE_FLOAT16_VALUE(8.0), SIMDE_FLOAT16_VALUE(8.0) } }, + { { SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(10.1), SIMDE_FLOAT16_VALUE(1.2), SIMDE_FLOAT16_VALUE(-11.7) }, + INT8_C(0), + { SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(1.1) } }, + { { SIMDE_FLOAT16_VALUE(-2.0), SIMDE_FLOAT16_VALUE(-14.0), SIMDE_FLOAT16_VALUE(-13.7), SIMDE_FLOAT16_VALUE(-10.2) }, + INT8_C(0), + { SIMDE_FLOAT16_VALUE(-2.0), SIMDE_FLOAT16_VALUE(-2.0), SIMDE_FLOAT16_VALUE(-2.0), SIMDE_FLOAT16_VALUE(-2.0) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t vec; + simde_float16x4_t r; + + vec = simde_vld1_f16(test_vec[i].vec); + SIMDE_CONSTIFY_4_(simde_vdup_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, vec); + + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + } + + return 0; +} + +static int +test_simde_vdupq_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 vec[4]; + int lane; + simde_float16 r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-3.4), SIMDE_FLOAT16_VALUE(6.4), SIMDE_FLOAT16_VALUE(-7.4), SIMDE_FLOAT16_VALUE(0.5) }, + INT8_C(0), + { SIMDE_FLOAT16_VALUE(-3.4), SIMDE_FLOAT16_VALUE(-3.4), SIMDE_FLOAT16_VALUE(-3.4), SIMDE_FLOAT16_VALUE(-3.4), + SIMDE_FLOAT16_VALUE(-3.4), SIMDE_FLOAT16_VALUE(-3.4), SIMDE_FLOAT16_VALUE(-3.4), SIMDE_FLOAT16_VALUE(-3.4) } }, + { { SIMDE_FLOAT16_VALUE(8.1), SIMDE_FLOAT16_VALUE(-0.0), SIMDE_FLOAT16_VALUE(6.2), SIMDE_FLOAT16_VALUE(-5.5) }, + INT8_C(3), + { SIMDE_FLOAT16_VALUE(-5.5), SIMDE_FLOAT16_VALUE(-5.5), SIMDE_FLOAT16_VALUE(-5.5), SIMDE_FLOAT16_VALUE(-5.5), + SIMDE_FLOAT16_VALUE(-5.5), SIMDE_FLOAT16_VALUE(-5.5), SIMDE_FLOAT16_VALUE(-5.5), SIMDE_FLOAT16_VALUE(-5.5) } }, + { { SIMDE_FLOAT16_VALUE(-9.6), SIMDE_FLOAT16_VALUE(6.7), SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(10.7) }, + INT8_C(3), + { SIMDE_FLOAT16_VALUE(10.7), SIMDE_FLOAT16_VALUE(10.7), SIMDE_FLOAT16_VALUE(10.7), SIMDE_FLOAT16_VALUE(10.7), + SIMDE_FLOAT16_VALUE(10.7), SIMDE_FLOAT16_VALUE(10.7), SIMDE_FLOAT16_VALUE(10.7), SIMDE_FLOAT16_VALUE(10.7) } }, + { { SIMDE_FLOAT16_VALUE(2.9), SIMDE_FLOAT16_VALUE(-15.0), SIMDE_FLOAT16_VALUE(14.8), SIMDE_FLOAT16_VALUE(-2.2) }, + INT8_C(1), + { SIMDE_FLOAT16_VALUE(-15.0), SIMDE_FLOAT16_VALUE(-15.0), SIMDE_FLOAT16_VALUE(-15.0), SIMDE_FLOAT16_VALUE(-15.0), + SIMDE_FLOAT16_VALUE(-15.0), SIMDE_FLOAT16_VALUE(-15.0), SIMDE_FLOAT16_VALUE(-15.0), SIMDE_FLOAT16_VALUE(-15.0) } }, + { { SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(0.2), SIMDE_FLOAT16_VALUE(7.5), SIMDE_FLOAT16_VALUE(-14.7) }, + INT8_C(2), + { SIMDE_FLOAT16_VALUE(7.5), SIMDE_FLOAT16_VALUE(7.5), SIMDE_FLOAT16_VALUE(7.5), SIMDE_FLOAT16_VALUE(7.5), + SIMDE_FLOAT16_VALUE(7.5), SIMDE_FLOAT16_VALUE(7.5), SIMDE_FLOAT16_VALUE(7.5), SIMDE_FLOAT16_VALUE(7.5) } }, + { { SIMDE_FLOAT16_VALUE(-6.0), SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(-8.9), SIMDE_FLOAT16_VALUE(-11.3) }, + INT8_C(3), + { SIMDE_FLOAT16_VALUE(-11.3), SIMDE_FLOAT16_VALUE(-11.3), SIMDE_FLOAT16_VALUE(-11.3), SIMDE_FLOAT16_VALUE(-11.3), + SIMDE_FLOAT16_VALUE(-11.3), SIMDE_FLOAT16_VALUE(-11.3), SIMDE_FLOAT16_VALUE(-11.3), SIMDE_FLOAT16_VALUE(-11.3) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t vec; + simde_float16x8_t r; + + vec = simde_vld1_f16(test_vec[i].vec); + SIMDE_CONSTIFY_4_(simde_vdupq_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, vec); + + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + } + + return 0; +} + + static int test_simde_vdup_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -2506,6 +2605,7 @@ test_simde_vdupq_laneq_u64 (SIMDE_MUNIT_TEST_ARGS) { HEDLEY_DIAGNOSTIC_POP SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vdup_lane_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_lane_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_lane_s8) @@ -2528,6 +2628,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vdup_laneq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_laneq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_laneq_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_lane_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_lane_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_lane_s8) diff --git a/test/arm/neon/ext.c b/test/arm/neon/ext.c index b5f1d8f80..6ea302d91 100644 --- a/test/arm/neon/ext.c +++ b/test/arm/neon/ext.c @@ -6,6 +6,69 @@ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +static int +test_simde_vext_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + simde_float16 b[4]; + int n; + simde_float16 r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-13.7), SIMDE_FLOAT16_VALUE(-11.7), SIMDE_FLOAT16_VALUE(-14.2), SIMDE_FLOAT16_VALUE(-6.9) }, + { SIMDE_FLOAT16_VALUE(-1.5), SIMDE_FLOAT16_VALUE(-6.5), SIMDE_FLOAT16_VALUE(-12.9), SIMDE_FLOAT16_VALUE(7.6) }, + INT32_C(3), + { SIMDE_FLOAT16_VALUE(-6.9), SIMDE_FLOAT16_VALUE(-1.5), SIMDE_FLOAT16_VALUE(-6.5), SIMDE_FLOAT16_VALUE(-12.9) } }, + { { SIMDE_FLOAT16_VALUE(-11.0), SIMDE_FLOAT16_VALUE(0.5), SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(-9.1) }, + { SIMDE_FLOAT16_VALUE(8.7), SIMDE_FLOAT16_VALUE(-5.3), SIMDE_FLOAT16_VALUE(5.4), SIMDE_FLOAT16_VALUE(5.8) }, + INT32_C(0), + { SIMDE_FLOAT16_VALUE(-11.0), SIMDE_FLOAT16_VALUE(0.5), SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(-9.1) } }, + { { SIMDE_FLOAT16_VALUE(-6.9), SIMDE_FLOAT16_VALUE(-14.8), SIMDE_FLOAT16_VALUE(13.5), SIMDE_FLOAT16_VALUE(-10.6) }, + { SIMDE_FLOAT16_VALUE(11.1), SIMDE_FLOAT16_VALUE(12.9), SIMDE_FLOAT16_VALUE(2.9), SIMDE_FLOAT16_VALUE(-7.8) }, + INT32_C(0), + { SIMDE_FLOAT16_VALUE(-6.9), SIMDE_FLOAT16_VALUE(-14.8), SIMDE_FLOAT16_VALUE(13.5), SIMDE_FLOAT16_VALUE(-10.6) } }, + { { SIMDE_FLOAT16_VALUE(0.5), SIMDE_FLOAT16_VALUE(12.8), SIMDE_FLOAT16_VALUE(6.3), SIMDE_FLOAT16_VALUE(11.0) }, + { SIMDE_FLOAT16_VALUE(11.2), SIMDE_FLOAT16_VALUE(8.6), SIMDE_FLOAT16_VALUE(8.6), SIMDE_FLOAT16_VALUE(-0.5) }, + INT32_C(2), + { SIMDE_FLOAT16_VALUE(6.3), SIMDE_FLOAT16_VALUE(11.0), SIMDE_FLOAT16_VALUE(11.2), SIMDE_FLOAT16_VALUE(8.6) } }, + { { SIMDE_FLOAT16_VALUE(-6.9), SIMDE_FLOAT16_VALUE(-7.3), SIMDE_FLOAT16_VALUE(10.8), SIMDE_FLOAT16_VALUE(-5.3) }, + { SIMDE_FLOAT16_VALUE(10.8), SIMDE_FLOAT16_VALUE(-6.5), SIMDE_FLOAT16_VALUE(11.4), SIMDE_FLOAT16_VALUE(2.7) }, + INT32_C(2), + { SIMDE_FLOAT16_VALUE(10.8), SIMDE_FLOAT16_VALUE(-5.3), SIMDE_FLOAT16_VALUE(10.8), SIMDE_FLOAT16_VALUE(-6.5) } }, + { { SIMDE_FLOAT16_VALUE(-8.3), SIMDE_FLOAT16_VALUE(-14.3), SIMDE_FLOAT16_VALUE(14.6), SIMDE_FLOAT16_VALUE(-0.7) }, + { SIMDE_FLOAT16_VALUE(-1.5), SIMDE_FLOAT16_VALUE(14.2), SIMDE_FLOAT16_VALUE(-14.9), SIMDE_FLOAT16_VALUE(8.1) }, + INT32_C(1), + { SIMDE_FLOAT16_VALUE(-14.3), SIMDE_FLOAT16_VALUE(14.6), SIMDE_FLOAT16_VALUE(-0.7), SIMDE_FLOAT16_VALUE(-1.5) } }, + { { SIMDE_FLOAT16_VALUE(15.0), SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(11.7), SIMDE_FLOAT16_VALUE(-13.5) }, + { SIMDE_FLOAT16_VALUE(-0.2), SIMDE_FLOAT16_VALUE(-4.4), SIMDE_FLOAT16_VALUE(-9.7), SIMDE_FLOAT16_VALUE(6.9) }, + INT32_C(0), + { SIMDE_FLOAT16_VALUE(15.0), SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(11.7), SIMDE_FLOAT16_VALUE(-13.5) } }, + { { SIMDE_FLOAT16_VALUE(1.0), SIMDE_FLOAT16_VALUE(9.5), SIMDE_FLOAT16_VALUE(-12.1), SIMDE_FLOAT16_VALUE(-3.7) }, + { SIMDE_FLOAT16_VALUE(8.1), SIMDE_FLOAT16_VALUE(2.0), SIMDE_FLOAT16_VALUE(-12.1), SIMDE_FLOAT16_VALUE(8.3) }, + INT32_C(1), + { SIMDE_FLOAT16_VALUE(9.5), SIMDE_FLOAT16_VALUE(-12.1), SIMDE_FLOAT16_VALUE(-3.7), SIMDE_FLOAT16_VALUE(8.1) } }, + { { SIMDE_FLOAT16_VALUE(-0.4), SIMDE_FLOAT16_VALUE(-12.9), SIMDE_FLOAT16_VALUE(-7.7), SIMDE_FLOAT16_VALUE(0.4) }, + { SIMDE_FLOAT16_VALUE(-3.6), SIMDE_FLOAT16_VALUE(-0.0), SIMDE_FLOAT16_VALUE(-0.1), SIMDE_FLOAT16_VALUE(2.6) }, + INT32_C(3), + { SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(-3.6), SIMDE_FLOAT16_VALUE(-0.0), SIMDE_FLOAT16_VALUE(-0.1) } }, + { { SIMDE_FLOAT16_VALUE(-8.6), SIMDE_FLOAT16_VALUE(9.3), SIMDE_FLOAT16_VALUE(2.0), SIMDE_FLOAT16_VALUE(-9.2) }, + { SIMDE_FLOAT16_VALUE(2.4), SIMDE_FLOAT16_VALUE(14.3), SIMDE_FLOAT16_VALUE(-3.8), SIMDE_FLOAT16_VALUE(-12.6) }, + INT32_C(1), + { SIMDE_FLOAT16_VALUE(9.3), SIMDE_FLOAT16_VALUE(2.0), SIMDE_FLOAT16_VALUE(-9.2), SIMDE_FLOAT16_VALUE(2.4) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + int n = test_vec[i].n; + simde_float16x4_t r; + SIMDE_CONSTIFY_4_(simde_vext_f16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); + + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + } + + return 0; +} + static int test_simde_vext_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1626,6 +1689,7 @@ test_simde_vextq_u64 (SIMDE_MUNIT_TEST_ARGS) { HEDLEY_DIAGNOSTIC_POP SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vext_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vext_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vext_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vext_s8) diff --git a/test/arm/neon/fma.c b/test/arm/neon/fma.c index 128212d15..9513e5c9e 100644 --- a/test/arm/neon/fma.c +++ b/test/arm/neon/fma.c @@ -73,6 +73,125 @@ test_simde_vfma_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vfmah_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a; + simde_float16 b; + simde_float16 c; + simde_float16 r; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE(31.59), + SIMDE_FLOAT16_VALUE(4.50), + SIMDE_FLOAT16_VALUE(-3.60), + SIMDE_FLOAT16_VALUE(15.39) }, + { SIMDE_FLOAT16_VALUE(18.30), + SIMDE_FLOAT16_VALUE(1.00), + SIMDE_FLOAT16_VALUE(0.40), + SIMDE_FLOAT16_VALUE(18.70) }, + { SIMDE_FLOAT16_VALUE(-27.00), + SIMDE_FLOAT16_VALUE(-4.80), + SIMDE_FLOAT16_VALUE(1.70), + SIMDE_FLOAT16_VALUE(-35.16) }, + { SIMDE_FLOAT16_VALUE(-24.30), + SIMDE_FLOAT16_VALUE(1.90), + SIMDE_FLOAT16_VALUE(-2.40), + SIMDE_FLOAT16_VALUE(-28.86) }, + { SIMDE_FLOAT16_VALUE(32.19), + SIMDE_FLOAT16_VALUE(1.10), + SIMDE_FLOAT16_VALUE(0.80), + SIMDE_FLOAT16_VALUE(33.06) }, + { SIMDE_FLOAT16_VALUE(-16.59), + SIMDE_FLOAT16_VALUE(-0.20), + SIMDE_FLOAT16_VALUE(1.60), + SIMDE_FLOAT16_VALUE(-16.91) }, + { SIMDE_FLOAT16_VALUE(34.59), + SIMDE_FLOAT16_VALUE(1.60), + SIMDE_FLOAT16_VALUE(-0.40), + SIMDE_FLOAT16_VALUE(33.97) }, + { SIMDE_FLOAT16_VALUE(18.09), + SIMDE_FLOAT16_VALUE(5.00), + SIMDE_FLOAT16_VALUE(2.80), + SIMDE_FLOAT16_VALUE(32.09) }, + { SIMDE_FLOAT16_VALUE(21.30), + SIMDE_FLOAT16_VALUE(-2.50), + SIMDE_FLOAT16_VALUE(-0.10), + SIMDE_FLOAT16_VALUE(21.55) }, + { SIMDE_FLOAT16_VALUE(22.20), + SIMDE_FLOAT16_VALUE(-3.10), + SIMDE_FLOAT16_VALUE(-4.60), + SIMDE_FLOAT16_VALUE(36.47) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16_t r = simde_vfmah_f16(test_vec[i].a, test_vec[i].b, test_vec[i].c); + simde_assert_equal_f16(r, test_vec[i].r, 1); + } + + return 0; +} + +static int +test_simde_vfma_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + simde_float16 b[4]; + simde_float16 c[4]; + simde_float16 r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(31.59), SIMDE_FLOAT16_VALUE(4.80), SIMDE_FLOAT16_VALUE(4.80), SIMDE_FLOAT16_VALUE(25.00) }, + { SIMDE_FLOAT16_VALUE(4.50), SIMDE_FLOAT16_VALUE(1.00), SIMDE_FLOAT16_VALUE(3.00), SIMDE_FLOAT16_VALUE(-2.20) } , + { SIMDE_FLOAT16_VALUE(-3.60), SIMDE_FLOAT16_VALUE(-1.80), SIMDE_FLOAT16_VALUE(4.00), SIMDE_FLOAT16_VALUE(-4.70) } , + { SIMDE_FLOAT16_VALUE(15.39), SIMDE_FLOAT16_VALUE(3.00), SIMDE_FLOAT16_VALUE(16.80), SIMDE_FLOAT16_VALUE(35.34) } }, + { { SIMDE_FLOAT16_VALUE(18.30), SIMDE_FLOAT16_VALUE(-39.00), SIMDE_FLOAT16_VALUE(-47.69), SIMDE_FLOAT16_VALUE(-43.50) } , + { SIMDE_FLOAT16_VALUE(1.00), SIMDE_FLOAT16_VALUE(-2.00), SIMDE_FLOAT16_VALUE(-2.30), SIMDE_FLOAT16_VALUE(-2.80) }, + { SIMDE_FLOAT16_VALUE(0.40), SIMDE_FLOAT16_VALUE(4.90), SIMDE_FLOAT16_VALUE(-0.30), SIMDE_FLOAT16_VALUE(-4.50) }, + { SIMDE_FLOAT16_VALUE(18.70), SIMDE_FLOAT16_VALUE(-48.81), SIMDE_FLOAT16_VALUE(-47.00), SIMDE_FLOAT16_VALUE(-30.91) } }, + { { SIMDE_FLOAT16_VALUE(-27.00), SIMDE_FLOAT16_VALUE(-35.59), SIMDE_FLOAT16_VALUE(-37.59), SIMDE_FLOAT16_VALUE(31.50) }, + { SIMDE_FLOAT16_VALUE(-4.80), SIMDE_FLOAT16_VALUE(-4.20), SIMDE_FLOAT16_VALUE(3.10), SIMDE_FLOAT16_VALUE(-3.10) }, + { SIMDE_FLOAT16_VALUE(1.70), SIMDE_FLOAT16_VALUE(-4.70), SIMDE_FLOAT16_VALUE(2.20), SIMDE_FLOAT16_VALUE(3.50) }, + { SIMDE_FLOAT16_VALUE(-35.16), SIMDE_FLOAT16_VALUE(-15.86), SIMDE_FLOAT16_VALUE(-30.78), SIMDE_FLOAT16_VALUE(20.66) } }, + { { SIMDE_FLOAT16_VALUE(-24.30), SIMDE_FLOAT16_VALUE(-8.70), SIMDE_FLOAT16_VALUE(-2.70), SIMDE_FLOAT16_VALUE(-7.60) }, + { SIMDE_FLOAT16_VALUE(1.90), SIMDE_FLOAT16_VALUE(0.40), SIMDE_FLOAT16_VALUE(1.80), SIMDE_FLOAT16_VALUE(1.60) }, + { SIMDE_FLOAT16_VALUE(-2.40), SIMDE_FLOAT16_VALUE(-0.10), SIMDE_FLOAT16_VALUE(-1.10), SIMDE_FLOAT16_VALUE(-2.70) }, + { SIMDE_FLOAT16_VALUE(-28.86), SIMDE_FLOAT16_VALUE(-8.74), SIMDE_FLOAT16_VALUE(-4.68), SIMDE_FLOAT16_VALUE(-11.92) } }, + { { SIMDE_FLOAT16_VALUE(32.19), SIMDE_FLOAT16_VALUE(0.70), SIMDE_FLOAT16_VALUE(10.60), SIMDE_FLOAT16_VALUE(47.50) }, + { SIMDE_FLOAT16_VALUE(1.10), SIMDE_FLOAT16_VALUE(2.90), SIMDE_FLOAT16_VALUE(-2.80), SIMDE_FLOAT16_VALUE(5.00) }, + { SIMDE_FLOAT16_VALUE(0.80), SIMDE_FLOAT16_VALUE(-4.30), SIMDE_FLOAT16_VALUE(-3.30), SIMDE_FLOAT16_VALUE(1.40) }, + { SIMDE_FLOAT16_VALUE(33.06), SIMDE_FLOAT16_VALUE(-11.77), SIMDE_FLOAT16_VALUE(19.84), SIMDE_FLOAT16_VALUE(54.50) } }, + { { SIMDE_FLOAT16_VALUE(-16.59), SIMDE_FLOAT16_VALUE(4.30), SIMDE_FLOAT16_VALUE(45.59), SIMDE_FLOAT16_VALUE(-28.00) }, + { SIMDE_FLOAT16_VALUE(-0.20), SIMDE_FLOAT16_VALUE(2.60), SIMDE_FLOAT16_VALUE(4.90), SIMDE_FLOAT16_VALUE(4.80) }, + { SIMDE_FLOAT16_VALUE(1.60), SIMDE_FLOAT16_VALUE(-4.90), SIMDE_FLOAT16_VALUE(-4.10), SIMDE_FLOAT16_VALUE(-2.80) }, + { SIMDE_FLOAT16_VALUE(-16.91), SIMDE_FLOAT16_VALUE(-8.44), SIMDE_FLOAT16_VALUE(25.50), SIMDE_FLOAT16_VALUE(-41.44) } }, + { { SIMDE_FLOAT16_VALUE(34.59), SIMDE_FLOAT16_VALUE(47.00), SIMDE_FLOAT16_VALUE(-23.30), SIMDE_FLOAT16_VALUE(-23.59) }, + { SIMDE_FLOAT16_VALUE(1.60), SIMDE_FLOAT16_VALUE(-4.90), SIMDE_FLOAT16_VALUE(3.30), SIMDE_FLOAT16_VALUE(-2.60) }, + { SIMDE_FLOAT16_VALUE(-0.40), SIMDE_FLOAT16_VALUE(-4.50), SIMDE_FLOAT16_VALUE(-4.10), SIMDE_FLOAT16_VALUE(0.30) }, + { SIMDE_FLOAT16_VALUE(33.97), SIMDE_FLOAT16_VALUE(69.06), SIMDE_FLOAT16_VALUE(-36.84), SIMDE_FLOAT16_VALUE(-24.38) } }, + { { SIMDE_FLOAT16_VALUE(18.09), SIMDE_FLOAT16_VALUE(2.60), SIMDE_FLOAT16_VALUE(-42.81), SIMDE_FLOAT16_VALUE(34.59) }, + { SIMDE_FLOAT16_VALUE(5.00), SIMDE_FLOAT16_VALUE(-3.50), SIMDE_FLOAT16_VALUE(1.90), SIMDE_FLOAT16_VALUE(-0.20) }, + { SIMDE_FLOAT16_VALUE(2.80), SIMDE_FLOAT16_VALUE(-4.00), SIMDE_FLOAT16_VALUE(-3.50), SIMDE_FLOAT16_VALUE(3.00) }, + { SIMDE_FLOAT16_VALUE(32.09), SIMDE_FLOAT16_VALUE(16.59), SIMDE_FLOAT16_VALUE(-49.47), SIMDE_FLOAT16_VALUE(34.00) } }, + { { SIMDE_FLOAT16_VALUE(21.30), SIMDE_FLOAT16_VALUE(31.59), SIMDE_FLOAT16_VALUE(7.20), SIMDE_FLOAT16_VALUE(45.00) }, + { SIMDE_FLOAT16_VALUE(-2.50), SIMDE_FLOAT16_VALUE(3.90), SIMDE_FLOAT16_VALUE(-1.70), SIMDE_FLOAT16_VALUE(-0.50) }, + { SIMDE_FLOAT16_VALUE(-0.10), SIMDE_FLOAT16_VALUE(-4.20), SIMDE_FLOAT16_VALUE(-4.10), SIMDE_FLOAT16_VALUE(3.80) }, + { SIMDE_FLOAT16_VALUE(21.55), SIMDE_FLOAT16_VALUE(15.22), SIMDE_FLOAT16_VALUE(14.17), SIMDE_FLOAT16_VALUE(43.09) } }, + { { SIMDE_FLOAT16_VALUE(22.20), SIMDE_FLOAT16_VALUE(-18.09), SIMDE_FLOAT16_VALUE(12.90), SIMDE_FLOAT16_VALUE(-21.20) }, + { SIMDE_FLOAT16_VALUE(-3.10), SIMDE_FLOAT16_VALUE(3.20), SIMDE_FLOAT16_VALUE(0.30), SIMDE_FLOAT16_VALUE(-4.20) }, + { SIMDE_FLOAT16_VALUE(-4.60), SIMDE_FLOAT16_VALUE(-3.00), SIMDE_FLOAT16_VALUE(4.90), SIMDE_FLOAT16_VALUE(3.70) }, + { SIMDE_FLOAT16_VALUE(36.47), SIMDE_FLOAT16_VALUE(-27.69), SIMDE_FLOAT16_VALUE(14.37), SIMDE_FLOAT16_VALUE(-36.75) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x4_t c = simde_vld1_f16(test_vec[i].c); + simde_float16x4_t r = simde_vfma_f16(a, b, c); + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + } + + return 0; +} + static int test_simde_vfmaq_f16 (SIMDE_MUNIT_TEST_ARGS) { struct { @@ -343,10 +462,12 @@ test_simde_vfmaq_f64 (SIMDE_MUNIT_TEST_ARGS) { } SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vfmah_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vfma_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vfma_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vfma_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vfmaq_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vfmaq_f32) -SIMDE_TEST_FUNC_LIST_ENTRY(vfma_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vfmaq_f64) SIMDE_TEST_FUNC_LIST_END diff --git a/test/arm/neon/fma_lane.c b/test/arm/neon/fma_lane.c index d0c67798d..96de9de83 100644 --- a/test/arm/neon/fma_lane.c +++ b/test/arm/neon/fma_lane.c @@ -149,6 +149,170 @@ test_simde_vfmad_laneq_f64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vfmah_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a; + simde_float16_t b; + simde_float16_t v[4]; + simde_float16_t r0; + simde_float16_t r1; + simde_float16_t r2; + simde_float16_t r3; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE(-5.9), + SIMDE_FLOAT16_VALUE(3.4), + { SIMDE_FLOAT16_VALUE(7.1), SIMDE_FLOAT16_VALUE(-14.2), SIMDE_FLOAT16_VALUE(4.1), SIMDE_FLOAT16_VALUE(0.9) }, + SIMDE_FLOAT16_VALUE(18.240), + SIMDE_FLOAT16_VALUE(-54.180), + SIMDE_FLOAT16_VALUE(8.040), + SIMDE_FLOAT16_VALUE(-2.840) }, + { SIMDE_FLOAT16_VALUE(-4.6), + SIMDE_FLOAT16_VALUE(-6.0), + { SIMDE_FLOAT16_VALUE(-12.0), SIMDE_FLOAT16_VALUE(-4.1), SIMDE_FLOAT16_VALUE(11.3), SIMDE_FLOAT16_VALUE(-1.1) }, + SIMDE_FLOAT16_VALUE(67.400), + SIMDE_FLOAT16_VALUE(20.000), + SIMDE_FLOAT16_VALUE(-72.400), + SIMDE_FLOAT16_VALUE(2.000) }, + { SIMDE_FLOAT16_VALUE(-8.2), + SIMDE_FLOAT16_VALUE(-6.9), + { SIMDE_FLOAT16_VALUE(-7.7), SIMDE_FLOAT16_VALUE(-3.2), SIMDE_FLOAT16_VALUE(-5.2), SIMDE_FLOAT16_VALUE(-0.2) }, + SIMDE_FLOAT16_VALUE(44.930), + SIMDE_FLOAT16_VALUE(13.880), + SIMDE_FLOAT16_VALUE(27.680), + SIMDE_FLOAT16_VALUE(-6.820) }, + { SIMDE_FLOAT16_VALUE(9.1), + SIMDE_FLOAT16_VALUE(-10.5), + { SIMDE_FLOAT16_VALUE(-13.7), SIMDE_FLOAT16_VALUE(13.1), SIMDE_FLOAT16_VALUE(9.2), SIMDE_FLOAT16_VALUE(13.5) }, + SIMDE_FLOAT16_VALUE(152.950), + SIMDE_FLOAT16_VALUE(-128.450), + SIMDE_FLOAT16_VALUE(-87.500), + SIMDE_FLOAT16_VALUE(-132.650) }, + { SIMDE_FLOAT16_VALUE(-4.1), + SIMDE_FLOAT16_VALUE(-2.4), + { SIMDE_FLOAT16_VALUE(-2.3), SIMDE_FLOAT16_VALUE(-15.0), SIMDE_FLOAT16_VALUE(-14.6), SIMDE_FLOAT16_VALUE(0.0) }, + SIMDE_FLOAT16_VALUE(1.420), + SIMDE_FLOAT16_VALUE(31.900), + SIMDE_FLOAT16_VALUE(30.940), + SIMDE_FLOAT16_VALUE(-4.100) } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t v = simde_vld1_f16(test_vec[i].v); + simde_float16_t r0 = simde_vfmah_lane_f16(test_vec[i].a, test_vec[i].b, v, 0); + simde_float16_t r1 = simde_vfmah_lane_f16(test_vec[i].a, test_vec[i].b, v, 1); + simde_float16_t r2 = simde_vfmah_lane_f16(test_vec[i].a, test_vec[i].b, v, 2); + simde_float16_t r3 = simde_vfmah_lane_f16(test_vec[i].a, test_vec[i].b, v, 3); + simde_assert_equal_f16(r0, test_vec[i].r0, 1); + simde_assert_equal_f16(r1, test_vec[i].r1, 1); + simde_assert_equal_f16(r2, test_vec[i].r2, 1); + simde_assert_equal_f16(r3, test_vec[i].r3, 1); + } + + return 0; +} + +static int +test_simde_vfmah_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a; + simde_float16_t b; + simde_float16_t v[8]; + simde_float16_t r0; + simde_float16_t r1; + simde_float16_t r2; + simde_float16_t r3; + simde_float16_t r4; + simde_float16_t r5; + simde_float16_t r6; + simde_float16_t r7; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE(9.6), + SIMDE_FLOAT16_VALUE(-2.7), + { SIMDE_FLOAT16_VALUE(-9.6), SIMDE_FLOAT16_VALUE(1.6), SIMDE_FLOAT16_VALUE(2.6), SIMDE_FLOAT16_VALUE(-0.9), + SIMDE_FLOAT16_VALUE(-3.7), SIMDE_FLOAT16_VALUE(0.9), SIMDE_FLOAT16_VALUE(-9.8), SIMDE_FLOAT16_VALUE(-0.1) }, + SIMDE_FLOAT16_VALUE(35.520), + SIMDE_FLOAT16_VALUE(5.280), + SIMDE_FLOAT16_VALUE(2.580), + SIMDE_FLOAT16_VALUE(12.030), + SIMDE_FLOAT16_VALUE(19.590), + SIMDE_FLOAT16_VALUE(7.170), + SIMDE_FLOAT16_VALUE(36.060), + SIMDE_FLOAT16_VALUE(9.870) }, + { SIMDE_FLOAT16_VALUE(8.3), + SIMDE_FLOAT16_VALUE(6.6), + { SIMDE_FLOAT16_VALUE(3.8), SIMDE_FLOAT16_VALUE(-1.8), SIMDE_FLOAT16_VALUE(4.7), SIMDE_FLOAT16_VALUE(-1.9), + SIMDE_FLOAT16_VALUE(-1.9), SIMDE_FLOAT16_VALUE(-2.8), SIMDE_FLOAT16_VALUE(-0.5), SIMDE_FLOAT16_VALUE(-5.0) }, + SIMDE_FLOAT16_VALUE(33.380), + SIMDE_FLOAT16_VALUE(-3.580), + SIMDE_FLOAT16_VALUE(39.320), + SIMDE_FLOAT16_VALUE(-4.240), + SIMDE_FLOAT16_VALUE(-4.240), + SIMDE_FLOAT16_VALUE(-10.180), + SIMDE_FLOAT16_VALUE(5.000), + SIMDE_FLOAT16_VALUE(-24.700) }, + { SIMDE_FLOAT16_VALUE(8.7), + SIMDE_FLOAT16_VALUE(2.3), + { SIMDE_FLOAT16_VALUE(-9.1), SIMDE_FLOAT16_VALUE(0.6), SIMDE_FLOAT16_VALUE(2.4), SIMDE_FLOAT16_VALUE(-9.5), + SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(-6.1), SIMDE_FLOAT16_VALUE(-6.3), SIMDE_FLOAT16_VALUE(3.4) }, + SIMDE_FLOAT16_VALUE(-12.230), + SIMDE_FLOAT16_VALUE(10.080), + SIMDE_FLOAT16_VALUE(14.220), + SIMDE_FLOAT16_VALUE(-13.150), + SIMDE_FLOAT16_VALUE(9.390), + SIMDE_FLOAT16_VALUE(-5.330), + SIMDE_FLOAT16_VALUE(-5.790), + SIMDE_FLOAT16_VALUE(16.520) }, + { SIMDE_FLOAT16_VALUE(3.3), + SIMDE_FLOAT16_VALUE(-9.7), + { SIMDE_FLOAT16_VALUE(2.8), SIMDE_FLOAT16_VALUE(-8.5), SIMDE_FLOAT16_VALUE(-5.5), SIMDE_FLOAT16_VALUE(6.3), + SIMDE_FLOAT16_VALUE(-9.1), SIMDE_FLOAT16_VALUE(-5.3), SIMDE_FLOAT16_VALUE(-3.4), SIMDE_FLOAT16_VALUE(7.2) }, + SIMDE_FLOAT16_VALUE(-23.860), + SIMDE_FLOAT16_VALUE(85.750), + SIMDE_FLOAT16_VALUE(56.650), + SIMDE_FLOAT16_VALUE(-57.810), + SIMDE_FLOAT16_VALUE(91.570), + SIMDE_FLOAT16_VALUE(54.710), + SIMDE_FLOAT16_VALUE(36.280), + SIMDE_FLOAT16_VALUE(-66.540) }, + { SIMDE_FLOAT16_VALUE(9.1), + SIMDE_FLOAT16_VALUE(-6.2), + { SIMDE_FLOAT16_VALUE(-0.0), SIMDE_FLOAT16_VALUE(-7.9), SIMDE_FLOAT16_VALUE(6.8), SIMDE_FLOAT16_VALUE(-8.0), + SIMDE_FLOAT16_VALUE(8.2), SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(-2.0), SIMDE_FLOAT16_VALUE(-0.7) }, + SIMDE_FLOAT16_VALUE(9.100), + SIMDE_FLOAT16_VALUE(58.080), + SIMDE_FLOAT16_VALUE(-33.060), + SIMDE_FLOAT16_VALUE(58.700), + SIMDE_FLOAT16_VALUE(-41.740), + SIMDE_FLOAT16_VALUE(0.420), + SIMDE_FLOAT16_VALUE(21.500), + SIMDE_FLOAT16_VALUE(13.440) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t v = simde_vld1q_f16(test_vec[i].v); + simde_float16_t r0 = simde_vfmah_laneq_f16(test_vec[i].a, test_vec[i].b, v, 0); + simde_float16_t r1 = simde_vfmah_laneq_f16(test_vec[i].a, test_vec[i].b, v, 1); + simde_float16_t r2 = simde_vfmah_laneq_f16(test_vec[i].a, test_vec[i].b, v, 2); + simde_float16_t r3 = simde_vfmah_laneq_f16(test_vec[i].a, test_vec[i].b, v, 3); + simde_float16_t r4 = simde_vfmah_laneq_f16(test_vec[i].a, test_vec[i].b, v, 4); + simde_float16_t r5 = simde_vfmah_laneq_f16(test_vec[i].a, test_vec[i].b, v, 5); + simde_float16_t r6 = simde_vfmah_laneq_f16(test_vec[i].a, test_vec[i].b, v, 6); + simde_float16_t r7 = simde_vfmah_laneq_f16(test_vec[i].a, test_vec[i].b, v, 7); + + simde_assert_equal_f16(r0, test_vec[i].r0, 1); + simde_assert_equal_f16(r1, test_vec[i].r1, 1); + simde_assert_equal_f16(r2, test_vec[i].r2, 1); + simde_assert_equal_f16(r3, test_vec[i].r3, 1); + simde_assert_equal_f16(r4, test_vec[i].r4, 1); + simde_assert_equal_f16(r5, test_vec[i].r5, 1); + simde_assert_equal_f16(r6, test_vec[i].r6, 1); + simde_assert_equal_f16(r7, test_vec[i].r7, 1); + } + + return 0; +} + static int test_simde_vfmas_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -336,6 +500,71 @@ test_simde_vfmas_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vfma_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[4]; + simde_float16_t b[4]; + simde_float16_t v[4]; + simde_float16_t r0[4]; + simde_float16_t r1[4]; + simde_float16_t r2[4]; + simde_float16_t r3[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-8.1), SIMDE_FLOAT16_VALUE(-5.0), SIMDE_FLOAT16_VALUE(-5.6), SIMDE_FLOAT16_VALUE(-4.3) }, + { SIMDE_FLOAT16_VALUE(2.4), SIMDE_FLOAT16_VALUE(-3.0), SIMDE_FLOAT16_VALUE(8.3), SIMDE_FLOAT16_VALUE(-5.4) }, + { SIMDE_FLOAT16_VALUE(-9.8), SIMDE_FLOAT16_VALUE(-9.6), SIMDE_FLOAT16_VALUE(-7.1), SIMDE_FLOAT16_VALUE(1.1) }, + { SIMDE_FLOAT16_VALUE(-31.620), SIMDE_FLOAT16_VALUE(24.400), SIMDE_FLOAT16_VALUE(-86.940), SIMDE_FLOAT16_VALUE(48.620) }, + { SIMDE_FLOAT16_VALUE(-31.140), SIMDE_FLOAT16_VALUE(23.800), SIMDE_FLOAT16_VALUE(-85.280), SIMDE_FLOAT16_VALUE(47.540) }, + { SIMDE_FLOAT16_VALUE(-25.140), SIMDE_FLOAT16_VALUE(16.300), SIMDE_FLOAT16_VALUE(-64.530), SIMDE_FLOAT16_VALUE(34.040) }, + { SIMDE_FLOAT16_VALUE(-5.460), SIMDE_FLOAT16_VALUE(-8.300), SIMDE_FLOAT16_VALUE(3.530), SIMDE_FLOAT16_VALUE(-10.240) } }, + { { SIMDE_FLOAT16_VALUE(-5.1), SIMDE_FLOAT16_VALUE(1.3), SIMDE_FLOAT16_VALUE(-1.5), SIMDE_FLOAT16_VALUE(-1.9) }, + { SIMDE_FLOAT16_VALUE(-7.7), SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(-1.7), SIMDE_FLOAT16_VALUE(0.8) }, + { SIMDE_FLOAT16_VALUE(-8.8), SIMDE_FLOAT16_VALUE(-7.7), SIMDE_FLOAT16_VALUE(10.0), SIMDE_FLOAT16_VALUE(2.2) }, + { SIMDE_FLOAT16_VALUE(62.660), SIMDE_FLOAT16_VALUE(-11.020), SIMDE_FLOAT16_VALUE(13.460), SIMDE_FLOAT16_VALUE(-8.940) }, + { SIMDE_FLOAT16_VALUE(54.190), SIMDE_FLOAT16_VALUE(-9.480), SIMDE_FLOAT16_VALUE(11.590), SIMDE_FLOAT16_VALUE(-8.060) }, + { SIMDE_FLOAT16_VALUE(-82.100), SIMDE_FLOAT16_VALUE(15.300), SIMDE_FLOAT16_VALUE(-18.500), SIMDE_FLOAT16_VALUE(6.100) }, + { SIMDE_FLOAT16_VALUE(-22.040), SIMDE_FLOAT16_VALUE(4.380), SIMDE_FLOAT16_VALUE(-5.240), SIMDE_FLOAT16_VALUE(-0.140) } }, + { { SIMDE_FLOAT16_VALUE(-1.2), SIMDE_FLOAT16_VALUE(1.6), SIMDE_FLOAT16_VALUE(-5.8), SIMDE_FLOAT16_VALUE(5.7) }, + { SIMDE_FLOAT16_VALUE(2.3), SIMDE_FLOAT16_VALUE(8.0), SIMDE_FLOAT16_VALUE(4.9), SIMDE_FLOAT16_VALUE(-9.3) }, + { SIMDE_FLOAT16_VALUE(7.3), SIMDE_FLOAT16_VALUE(7.8), SIMDE_FLOAT16_VALUE(8.4), SIMDE_FLOAT16_VALUE(0.6) }, + { SIMDE_FLOAT16_VALUE(15.590), SIMDE_FLOAT16_VALUE(60.000), SIMDE_FLOAT16_VALUE(29.970), SIMDE_FLOAT16_VALUE(-62.190) }, + { SIMDE_FLOAT16_VALUE(16.740), SIMDE_FLOAT16_VALUE(64.000), SIMDE_FLOAT16_VALUE(32.420), SIMDE_FLOAT16_VALUE(-66.840) }, + { SIMDE_FLOAT16_VALUE(18.120), SIMDE_FLOAT16_VALUE(68.800), SIMDE_FLOAT16_VALUE(35.360), SIMDE_FLOAT16_VALUE(-72.420) }, + { SIMDE_FLOAT16_VALUE(0.180), SIMDE_FLOAT16_VALUE(6.400), SIMDE_FLOAT16_VALUE(-2.860), SIMDE_FLOAT16_VALUE(0.120) } }, + { { SIMDE_FLOAT16_VALUE(8.9), SIMDE_FLOAT16_VALUE(1.6), SIMDE_FLOAT16_VALUE(-4.1), SIMDE_FLOAT16_VALUE(8.3) }, + { SIMDE_FLOAT16_VALUE(-9.5), SIMDE_FLOAT16_VALUE(-4.5), SIMDE_FLOAT16_VALUE(-9.2), SIMDE_FLOAT16_VALUE(5.3) }, + { SIMDE_FLOAT16_VALUE(-4.8), SIMDE_FLOAT16_VALUE(6.8), SIMDE_FLOAT16_VALUE(0.6), SIMDE_FLOAT16_VALUE(-9.6) }, + { SIMDE_FLOAT16_VALUE(54.500), SIMDE_FLOAT16_VALUE(23.200), SIMDE_FLOAT16_VALUE(40.060), SIMDE_FLOAT16_VALUE(-17.140) }, + { SIMDE_FLOAT16_VALUE(-55.700), SIMDE_FLOAT16_VALUE(-29.000), SIMDE_FLOAT16_VALUE(-66.660), SIMDE_FLOAT16_VALUE(44.340) }, + { SIMDE_FLOAT16_VALUE(3.200), SIMDE_FLOAT16_VALUE(-1.100), SIMDE_FLOAT16_VALUE(-9.620), SIMDE_FLOAT16_VALUE(11.480) }, + { SIMDE_FLOAT16_VALUE(100.100), SIMDE_FLOAT16_VALUE(44.800), SIMDE_FLOAT16_VALUE(84.220), SIMDE_FLOAT16_VALUE(-42.580) } }, + { { SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(-6.9), SIMDE_FLOAT16_VALUE(5.2), SIMDE_FLOAT16_VALUE(5.7) }, + { SIMDE_FLOAT16_VALUE(-7.6), SIMDE_FLOAT16_VALUE(-6.4), SIMDE_FLOAT16_VALUE(-6.3), SIMDE_FLOAT16_VALUE(-3.1) }, + { SIMDE_FLOAT16_VALUE(-7.0), SIMDE_FLOAT16_VALUE(-9.0), SIMDE_FLOAT16_VALUE(5.3), SIMDE_FLOAT16_VALUE(1.1) }, + { SIMDE_FLOAT16_VALUE(54.300), SIMDE_FLOAT16_VALUE(37.900), SIMDE_FLOAT16_VALUE(49.300), SIMDE_FLOAT16_VALUE(27.400) }, + { SIMDE_FLOAT16_VALUE(69.500), SIMDE_FLOAT16_VALUE(50.700), SIMDE_FLOAT16_VALUE(61.900), SIMDE_FLOAT16_VALUE(33.600) }, + { SIMDE_FLOAT16_VALUE(-39.180), SIMDE_FLOAT16_VALUE(-40.820), SIMDE_FLOAT16_VALUE(-28.190), SIMDE_FLOAT16_VALUE(-10.730) }, + { SIMDE_FLOAT16_VALUE(-7.260), SIMDE_FLOAT16_VALUE(-13.940), SIMDE_FLOAT16_VALUE(-1.730), SIMDE_FLOAT16_VALUE(2.290) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t v = simde_vld1_f16(test_vec[i].v); + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x4_t r0 = simde_vfma_lane_f16(a, b, v, 0); + simde_float16x4_t r1 = simde_vfma_lane_f16(a, b, v, 1); + simde_float16x4_t r2 = simde_vfma_lane_f16(a, b, v, 2); + simde_float16x4_t r3 = simde_vfma_lane_f16(a, b, v, 3); + simde_test_arm_neon_assert_equal_f16x4(r0, simde_vld1_f16(test_vec[i].r0), 1); + simde_test_arm_neon_assert_equal_f16x4(r1, simde_vld1_f16(test_vec[i].r1), 1); + simde_test_arm_neon_assert_equal_f16x4(r2, simde_vld1_f16(test_vec[i].r2), 1); + simde_test_arm_neon_assert_equal_f16x4(r3, simde_vld1_f16(test_vec[i].r3), 1); + } + + return 0; +} + static int test_simde_vfma_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -485,6 +714,108 @@ test_simde_vfma_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vfma_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[4]; + simde_float16_t b[4]; + simde_float16_t v[8]; + simde_float16_t r0[4]; + simde_float16_t r1[4]; + simde_float16_t r2[4]; + simde_float16_t r3[4]; + simde_float16_t r4[4]; + simde_float16_t r5[4]; + simde_float16_t r6[4]; + simde_float16_t r7[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(0.5), SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(-5.6), SIMDE_FLOAT16_VALUE(-0.4) }, + { SIMDE_FLOAT16_VALUE(-9.8), SIMDE_FLOAT16_VALUE(6.1), SIMDE_FLOAT16_VALUE(-5.7), SIMDE_FLOAT16_VALUE(7.3) }, + { SIMDE_FLOAT16_VALUE(-2.1), SIMDE_FLOAT16_VALUE(-0.7), SIMDE_FLOAT16_VALUE(-6.6), SIMDE_FLOAT16_VALUE(-5.5), + SIMDE_FLOAT16_VALUE(-2.1), SIMDE_FLOAT16_VALUE(-1.8), SIMDE_FLOAT16_VALUE(-3.1), SIMDE_FLOAT16_VALUE(2.8) }, + { SIMDE_FLOAT16_VALUE(21.080), SIMDE_FLOAT16_VALUE(-12.510), SIMDE_FLOAT16_VALUE(6.370), SIMDE_FLOAT16_VALUE(-15.730) }, + { SIMDE_FLOAT16_VALUE(7.360), SIMDE_FLOAT16_VALUE(-3.970), SIMDE_FLOAT16_VALUE(-1.610), SIMDE_FLOAT16_VALUE(-5.510) }, + { SIMDE_FLOAT16_VALUE(65.180), SIMDE_FLOAT16_VALUE(-39.960), SIMDE_FLOAT16_VALUE(32.020), SIMDE_FLOAT16_VALUE(-48.580) }, + { SIMDE_FLOAT16_VALUE(54.400), SIMDE_FLOAT16_VALUE(-33.250), SIMDE_FLOAT16_VALUE(25.750), SIMDE_FLOAT16_VALUE(-40.550) }, + { SIMDE_FLOAT16_VALUE(21.080), SIMDE_FLOAT16_VALUE(-12.510), SIMDE_FLOAT16_VALUE(6.370), SIMDE_FLOAT16_VALUE(-15.730) }, + { SIMDE_FLOAT16_VALUE(18.140), SIMDE_FLOAT16_VALUE(-10.680), SIMDE_FLOAT16_VALUE(4.660), SIMDE_FLOAT16_VALUE(-13.540) }, + { SIMDE_FLOAT16_VALUE(30.880), SIMDE_FLOAT16_VALUE(-18.610), SIMDE_FLOAT16_VALUE(12.070), SIMDE_FLOAT16_VALUE(-23.030) }, + { SIMDE_FLOAT16_VALUE(-26.940), SIMDE_FLOAT16_VALUE(17.380), SIMDE_FLOAT16_VALUE(-21.560), SIMDE_FLOAT16_VALUE(20.040) } }, + { { SIMDE_FLOAT16_VALUE(-1.7), SIMDE_FLOAT16_VALUE(7.9), SIMDE_FLOAT16_VALUE(-2.4), SIMDE_FLOAT16_VALUE(-0.1) }, + { SIMDE_FLOAT16_VALUE(-7.2), SIMDE_FLOAT16_VALUE(5.5), SIMDE_FLOAT16_VALUE(-9.1), SIMDE_FLOAT16_VALUE(-2.8) }, + { SIMDE_FLOAT16_VALUE(9.5), SIMDE_FLOAT16_VALUE(9.3), SIMDE_FLOAT16_VALUE(-9.3), SIMDE_FLOAT16_VALUE(-2.5), + SIMDE_FLOAT16_VALUE(-9.9), SIMDE_FLOAT16_VALUE(2.9), SIMDE_FLOAT16_VALUE(-6.0), SIMDE_FLOAT16_VALUE(6.4) }, + { SIMDE_FLOAT16_VALUE(-70.100), SIMDE_FLOAT16_VALUE(60.150), SIMDE_FLOAT16_VALUE(-88.850), SIMDE_FLOAT16_VALUE(-26.700) }, + { SIMDE_FLOAT16_VALUE(-68.660), SIMDE_FLOAT16_VALUE(59.050), SIMDE_FLOAT16_VALUE(-87.030), SIMDE_FLOAT16_VALUE(-26.140) }, + { SIMDE_FLOAT16_VALUE(65.260), SIMDE_FLOAT16_VALUE(-43.250), SIMDE_FLOAT16_VALUE(82.230), SIMDE_FLOAT16_VALUE(25.940) }, + { SIMDE_FLOAT16_VALUE(16.300), SIMDE_FLOAT16_VALUE(-5.850), SIMDE_FLOAT16_VALUE(20.350), SIMDE_FLOAT16_VALUE(6.900) }, + { SIMDE_FLOAT16_VALUE(69.580), SIMDE_FLOAT16_VALUE(-46.550), SIMDE_FLOAT16_VALUE(87.690), SIMDE_FLOAT16_VALUE(27.620) }, + { SIMDE_FLOAT16_VALUE(-22.580), SIMDE_FLOAT16_VALUE(23.850), SIMDE_FLOAT16_VALUE(-28.790), SIMDE_FLOAT16_VALUE(-8.220) }, + { SIMDE_FLOAT16_VALUE(41.500), SIMDE_FLOAT16_VALUE(-25.100), SIMDE_FLOAT16_VALUE(52.200), SIMDE_FLOAT16_VALUE(16.700) }, + { SIMDE_FLOAT16_VALUE(-47.780), SIMDE_FLOAT16_VALUE(43.100), SIMDE_FLOAT16_VALUE(-60.640), SIMDE_FLOAT16_VALUE(-18.020) } }, + { { SIMDE_FLOAT16_VALUE(-9.5), SIMDE_FLOAT16_VALUE(-7.9), SIMDE_FLOAT16_VALUE(3.2), SIMDE_FLOAT16_VALUE(6.3) }, + { SIMDE_FLOAT16_VALUE(-5.6), SIMDE_FLOAT16_VALUE(2.3), SIMDE_FLOAT16_VALUE(-8.6), SIMDE_FLOAT16_VALUE(-1.0) }, + { SIMDE_FLOAT16_VALUE(0.1), SIMDE_FLOAT16_VALUE(1.8), SIMDE_FLOAT16_VALUE(-2.1), SIMDE_FLOAT16_VALUE(7.4), + SIMDE_FLOAT16_VALUE(-4.2), SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(-6.2), SIMDE_FLOAT16_VALUE(5.9) }, + { SIMDE_FLOAT16_VALUE(-10.060), SIMDE_FLOAT16_VALUE(-7.670), SIMDE_FLOAT16_VALUE(2.340), SIMDE_FLOAT16_VALUE(6.200) }, + { SIMDE_FLOAT16_VALUE(-19.580), SIMDE_FLOAT16_VALUE(-3.760), SIMDE_FLOAT16_VALUE(-12.280), SIMDE_FLOAT16_VALUE(4.500) }, + { SIMDE_FLOAT16_VALUE(2.260), SIMDE_FLOAT16_VALUE(-12.730), SIMDE_FLOAT16_VALUE(21.260), SIMDE_FLOAT16_VALUE(8.400) }, + { SIMDE_FLOAT16_VALUE(-50.940), SIMDE_FLOAT16_VALUE(9.120), SIMDE_FLOAT16_VALUE(-60.440), SIMDE_FLOAT16_VALUE(-1.100) }, + { SIMDE_FLOAT16_VALUE(14.020), SIMDE_FLOAT16_VALUE(-17.560), SIMDE_FLOAT16_VALUE(39.320), SIMDE_FLOAT16_VALUE(10.500) }, + { SIMDE_FLOAT16_VALUE(-11.740), SIMDE_FLOAT16_VALUE(-6.980), SIMDE_FLOAT16_VALUE(-0.240), SIMDE_FLOAT16_VALUE(5.900) }, + { SIMDE_FLOAT16_VALUE(25.220), SIMDE_FLOAT16_VALUE(-22.160), SIMDE_FLOAT16_VALUE(56.520), SIMDE_FLOAT16_VALUE(12.500) }, + { SIMDE_FLOAT16_VALUE(-42.540), SIMDE_FLOAT16_VALUE(5.670), SIMDE_FLOAT16_VALUE(-47.540), SIMDE_FLOAT16_VALUE(0.400) } }, + { { SIMDE_FLOAT16_VALUE(-7.0), SIMDE_FLOAT16_VALUE(5.9), SIMDE_FLOAT16_VALUE(-10.0), SIMDE_FLOAT16_VALUE(-8.9) }, + { SIMDE_FLOAT16_VALUE(-2.8), SIMDE_FLOAT16_VALUE(6.7), SIMDE_FLOAT16_VALUE(-4.1), SIMDE_FLOAT16_VALUE(-7.0) }, + { SIMDE_FLOAT16_VALUE(7.3), SIMDE_FLOAT16_VALUE(1.2), SIMDE_FLOAT16_VALUE(0.2), SIMDE_FLOAT16_VALUE(4.4), + SIMDE_FLOAT16_VALUE(-9.1), SIMDE_FLOAT16_VALUE(6.0), SIMDE_FLOAT16_VALUE(4.0), SIMDE_FLOAT16_VALUE(-3.8) }, + { SIMDE_FLOAT16_VALUE(-27.440), SIMDE_FLOAT16_VALUE(54.810), SIMDE_FLOAT16_VALUE(-39.930), SIMDE_FLOAT16_VALUE(-60.000) }, + { SIMDE_FLOAT16_VALUE(-10.360), SIMDE_FLOAT16_VALUE(13.940), SIMDE_FLOAT16_VALUE(-14.920), SIMDE_FLOAT16_VALUE(-17.300) }, + { SIMDE_FLOAT16_VALUE(-7.560), SIMDE_FLOAT16_VALUE(7.240), SIMDE_FLOAT16_VALUE(-10.820), SIMDE_FLOAT16_VALUE(-10.300) }, + { SIMDE_FLOAT16_VALUE(-19.320), SIMDE_FLOAT16_VALUE(35.380), SIMDE_FLOAT16_VALUE(-28.040), SIMDE_FLOAT16_VALUE(-39.700) }, + { SIMDE_FLOAT16_VALUE(18.480), SIMDE_FLOAT16_VALUE(-55.070), SIMDE_FLOAT16_VALUE(27.310), SIMDE_FLOAT16_VALUE(54.800) }, + { SIMDE_FLOAT16_VALUE(-23.800), SIMDE_FLOAT16_VALUE(46.100), SIMDE_FLOAT16_VALUE(-34.600), SIMDE_FLOAT16_VALUE(-50.900) }, + { SIMDE_FLOAT16_VALUE(-18.200), SIMDE_FLOAT16_VALUE(32.700), SIMDE_FLOAT16_VALUE(-26.400), SIMDE_FLOAT16_VALUE(-36.900) }, + { SIMDE_FLOAT16_VALUE(3.640), SIMDE_FLOAT16_VALUE(-19.560), SIMDE_FLOAT16_VALUE(5.580), SIMDE_FLOAT16_VALUE(17.700) } }, + { { SIMDE_FLOAT16_VALUE(6.3), SIMDE_FLOAT16_VALUE(-1.5), SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(-3.6) }, + { SIMDE_FLOAT16_VALUE(-9.1), SIMDE_FLOAT16_VALUE(8.2), SIMDE_FLOAT16_VALUE(-8.6), SIMDE_FLOAT16_VALUE(-4.1) }, + { SIMDE_FLOAT16_VALUE(4.9), SIMDE_FLOAT16_VALUE(-8.9), SIMDE_FLOAT16_VALUE(5.1), SIMDE_FLOAT16_VALUE(6.4), + SIMDE_FLOAT16_VALUE(-2.4), SIMDE_FLOAT16_VALUE(7.2), SIMDE_FLOAT16_VALUE(8.8), SIMDE_FLOAT16_VALUE(-1.0) }, + { SIMDE_FLOAT16_VALUE(-38.290), SIMDE_FLOAT16_VALUE(38.680), SIMDE_FLOAT16_VALUE(-41.040), SIMDE_FLOAT16_VALUE(-23.690) }, + { SIMDE_FLOAT16_VALUE(87.290), SIMDE_FLOAT16_VALUE(-74.480), SIMDE_FLOAT16_VALUE(77.640), SIMDE_FLOAT16_VALUE(32.890) }, + { SIMDE_FLOAT16_VALUE(-40.110), SIMDE_FLOAT16_VALUE(40.320), SIMDE_FLOAT16_VALUE(-42.760), SIMDE_FLOAT16_VALUE(-24.510) }, + { SIMDE_FLOAT16_VALUE(-51.940), SIMDE_FLOAT16_VALUE(50.980), SIMDE_FLOAT16_VALUE(-53.940), SIMDE_FLOAT16_VALUE(-29.840) }, + { SIMDE_FLOAT16_VALUE(28.140), SIMDE_FLOAT16_VALUE(-21.180), SIMDE_FLOAT16_VALUE(21.740), SIMDE_FLOAT16_VALUE(6.240) }, + { SIMDE_FLOAT16_VALUE(-59.220), SIMDE_FLOAT16_VALUE(57.540), SIMDE_FLOAT16_VALUE(-60.820), SIMDE_FLOAT16_VALUE(-33.120) }, + { SIMDE_FLOAT16_VALUE(-73.780), SIMDE_FLOAT16_VALUE(70.660), SIMDE_FLOAT16_VALUE(-74.580), SIMDE_FLOAT16_VALUE(-39.680) }, + { SIMDE_FLOAT16_VALUE(15.400), SIMDE_FLOAT16_VALUE(-9.700), SIMDE_FLOAT16_VALUE(9.700), SIMDE_FLOAT16_VALUE(0.500) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t v = simde_vld1q_f16(test_vec[i].v); + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x4_t r0 = simde_vfma_laneq_f16(a, b, v, 0); + simde_float16x4_t r1 = simde_vfma_laneq_f16(a, b, v, 1); + simde_float16x4_t r2 = simde_vfma_laneq_f16(a, b, v, 2); + simde_float16x4_t r3 = simde_vfma_laneq_f16(a, b, v, 3); + simde_float16x4_t r4 = simde_vfma_laneq_f16(a, b, v, 4); + simde_float16x4_t r5 = simde_vfma_laneq_f16(a, b, v, 5); + simde_float16x4_t r6 = simde_vfma_laneq_f16(a, b, v, 6); + simde_float16x4_t r7 = simde_vfma_laneq_f16(a, b, v, 7); + simde_test_arm_neon_assert_equal_f16x4(r0, simde_vld1_f16(test_vec[i].r0), 1); + simde_test_arm_neon_assert_equal_f16x4(r1, simde_vld1_f16(test_vec[i].r1), 1); + simde_test_arm_neon_assert_equal_f16x4(r2, simde_vld1_f16(test_vec[i].r2), 1); + simde_test_arm_neon_assert_equal_f16x4(r3, simde_vld1_f16(test_vec[i].r3), 1); + simde_test_arm_neon_assert_equal_f16x4(r4, simde_vld1_f16(test_vec[i].r4), 1); + simde_test_arm_neon_assert_equal_f16x4(r5, simde_vld1_f16(test_vec[i].r5), 1); + simde_test_arm_neon_assert_equal_f16x4(r6, simde_vld1_f16(test_vec[i].r6), 1); + simde_test_arm_neon_assert_equal_f16x4(r7, simde_vld1_f16(test_vec[i].r7), 1); + } + + return 0; +} + static int test_simde_vfma_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -675,6 +1006,101 @@ test_simde_vfma_laneq_f64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vfmaq_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[8]; + simde_float16_t b[8]; + simde_float16_t v[4]; + simde_float16_t r0[8]; + simde_float16_t r1[8]; + simde_float16_t r2[8]; + simde_float16_t r3[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-0.2), SIMDE_FLOAT16_VALUE(3.9), SIMDE_FLOAT16_VALUE(7.7), SIMDE_FLOAT16_VALUE(6.9), + SIMDE_FLOAT16_VALUE(2.4), SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(-9.6), SIMDE_FLOAT16_VALUE(-4.5) }, + { SIMDE_FLOAT16_VALUE(8.8), SIMDE_FLOAT16_VALUE(2.0), SIMDE_FLOAT16_VALUE(7.3), SIMDE_FLOAT16_VALUE(-2.2), + SIMDE_FLOAT16_VALUE(0.8), SIMDE_FLOAT16_VALUE(-2.0), SIMDE_FLOAT16_VALUE(9.0), SIMDE_FLOAT16_VALUE(7.2) }, + { SIMDE_FLOAT16_VALUE(4.7), SIMDE_FLOAT16_VALUE(-5.8), SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(-5.1) }, + { SIMDE_FLOAT16_VALUE(41.160), SIMDE_FLOAT16_VALUE(13.300), SIMDE_FLOAT16_VALUE(42.010), SIMDE_FLOAT16_VALUE(-3.440), + SIMDE_FLOAT16_VALUE(6.160), SIMDE_FLOAT16_VALUE(-9.000), SIMDE_FLOAT16_VALUE(32.700), SIMDE_FLOAT16_VALUE(29.340) }, + { SIMDE_FLOAT16_VALUE(-51.240), SIMDE_FLOAT16_VALUE(-7.700), SIMDE_FLOAT16_VALUE(-34.640), SIMDE_FLOAT16_VALUE(19.660), + SIMDE_FLOAT16_VALUE(-2.240), SIMDE_FLOAT16_VALUE(12.000), SIMDE_FLOAT16_VALUE(-61.800), SIMDE_FLOAT16_VALUE(-46.260) }, + { SIMDE_FLOAT16_VALUE(2.440), SIMDE_FLOAT16_VALUE(4.500), SIMDE_FLOAT16_VALUE(9.890), SIMDE_FLOAT16_VALUE(6.240), + SIMDE_FLOAT16_VALUE(2.640), SIMDE_FLOAT16_VALUE(-0.200), SIMDE_FLOAT16_VALUE(-6.900), SIMDE_FLOAT16_VALUE(-2.340) }, + { SIMDE_FLOAT16_VALUE(-45.080), SIMDE_FLOAT16_VALUE(-6.300), SIMDE_FLOAT16_VALUE(-29.530), SIMDE_FLOAT16_VALUE(18.120), + SIMDE_FLOAT16_VALUE(-1.680), SIMDE_FLOAT16_VALUE(10.600), SIMDE_FLOAT16_VALUE(-55.500), SIMDE_FLOAT16_VALUE(-41.220) } }, + { { SIMDE_FLOAT16_VALUE(-9.6), SIMDE_FLOAT16_VALUE(6.6), SIMDE_FLOAT16_VALUE(0.7), SIMDE_FLOAT16_VALUE(-6.3), + SIMDE_FLOAT16_VALUE(-4.3), SIMDE_FLOAT16_VALUE(-4.5), SIMDE_FLOAT16_VALUE(6.0), SIMDE_FLOAT16_VALUE(4.7) }, + { SIMDE_FLOAT16_VALUE(1.8), SIMDE_FLOAT16_VALUE(6.6), SIMDE_FLOAT16_VALUE(-9.5), SIMDE_FLOAT16_VALUE(-3.0), + SIMDE_FLOAT16_VALUE(-5.9), SIMDE_FLOAT16_VALUE(-2.8), SIMDE_FLOAT16_VALUE(7.6), SIMDE_FLOAT16_VALUE(4.5) }, + { SIMDE_FLOAT16_VALUE(2.8), SIMDE_FLOAT16_VALUE(7.9), SIMDE_FLOAT16_VALUE(0.9), SIMDE_FLOAT16_VALUE(4.1) }, + { SIMDE_FLOAT16_VALUE(-4.560), SIMDE_FLOAT16_VALUE(25.080), SIMDE_FLOAT16_VALUE(-25.900), SIMDE_FLOAT16_VALUE(-14.700), + SIMDE_FLOAT16_VALUE(-20.820), SIMDE_FLOAT16_VALUE(-12.340), SIMDE_FLOAT16_VALUE(27.280), SIMDE_FLOAT16_VALUE(17.300) }, + { SIMDE_FLOAT16_VALUE(4.620), SIMDE_FLOAT16_VALUE(58.740), SIMDE_FLOAT16_VALUE(-74.350), SIMDE_FLOAT16_VALUE(-30.000), + SIMDE_FLOAT16_VALUE(-50.910), SIMDE_FLOAT16_VALUE(-26.620), SIMDE_FLOAT16_VALUE(66.040), SIMDE_FLOAT16_VALUE(40.250) }, + { SIMDE_FLOAT16_VALUE(-7.980), SIMDE_FLOAT16_VALUE(12.540), SIMDE_FLOAT16_VALUE(-7.850), SIMDE_FLOAT16_VALUE(-9.000), + SIMDE_FLOAT16_VALUE(-9.610), SIMDE_FLOAT16_VALUE(-7.020), SIMDE_FLOAT16_VALUE(12.840), SIMDE_FLOAT16_VALUE(8.750) }, + { SIMDE_FLOAT16_VALUE(-2.220), SIMDE_FLOAT16_VALUE(33.660), SIMDE_FLOAT16_VALUE(-38.250), SIMDE_FLOAT16_VALUE(-18.600), + SIMDE_FLOAT16_VALUE(-28.490), SIMDE_FLOAT16_VALUE(-15.980), SIMDE_FLOAT16_VALUE(37.160), SIMDE_FLOAT16_VALUE(23.150) } }, + { { SIMDE_FLOAT16_VALUE(-3.3), SIMDE_FLOAT16_VALUE(-2.2), SIMDE_FLOAT16_VALUE(1.9), SIMDE_FLOAT16_VALUE(5.5), + SIMDE_FLOAT16_VALUE(-6.8), SIMDE_FLOAT16_VALUE(-2.6), SIMDE_FLOAT16_VALUE(1.6), SIMDE_FLOAT16_VALUE(-3.2) }, + { SIMDE_FLOAT16_VALUE(-5.9), SIMDE_FLOAT16_VALUE(-3.3), SIMDE_FLOAT16_VALUE(0.5), SIMDE_FLOAT16_VALUE(0.2), + SIMDE_FLOAT16_VALUE(-8.9), SIMDE_FLOAT16_VALUE(7.2), SIMDE_FLOAT16_VALUE(8.2), SIMDE_FLOAT16_VALUE(1.5) }, + { SIMDE_FLOAT16_VALUE(3.5), SIMDE_FLOAT16_VALUE(1.3), SIMDE_FLOAT16_VALUE(0.8), SIMDE_FLOAT16_VALUE(2.4) }, + { SIMDE_FLOAT16_VALUE(-23.950), SIMDE_FLOAT16_VALUE(-13.750), SIMDE_FLOAT16_VALUE(3.650), SIMDE_FLOAT16_VALUE(6.200), + SIMDE_FLOAT16_VALUE(-37.950), SIMDE_FLOAT16_VALUE(22.600), SIMDE_FLOAT16_VALUE(30.300), SIMDE_FLOAT16_VALUE(2.050) }, + { SIMDE_FLOAT16_VALUE(-10.970), SIMDE_FLOAT16_VALUE(-6.490), SIMDE_FLOAT16_VALUE(2.550), SIMDE_FLOAT16_VALUE(5.760), + SIMDE_FLOAT16_VALUE(-18.370), SIMDE_FLOAT16_VALUE(6.760), SIMDE_FLOAT16_VALUE(12.260), SIMDE_FLOAT16_VALUE(-1.250) }, + { SIMDE_FLOAT16_VALUE(-8.020), SIMDE_FLOAT16_VALUE(-4.840), SIMDE_FLOAT16_VALUE(2.300), SIMDE_FLOAT16_VALUE(5.660), + SIMDE_FLOAT16_VALUE(-13.920), SIMDE_FLOAT16_VALUE(3.160), SIMDE_FLOAT16_VALUE(8.160), SIMDE_FLOAT16_VALUE(-2.000) }, + { SIMDE_FLOAT16_VALUE(-17.460), SIMDE_FLOAT16_VALUE(-10.120), SIMDE_FLOAT16_VALUE(3.100), SIMDE_FLOAT16_VALUE(5.980), + SIMDE_FLOAT16_VALUE(-28.160), SIMDE_FLOAT16_VALUE(14.680), SIMDE_FLOAT16_VALUE(21.280), SIMDE_FLOAT16_VALUE(0.400) } }, + { { SIMDE_FLOAT16_VALUE(2.6), SIMDE_FLOAT16_VALUE(0.1), SIMDE_FLOAT16_VALUE(2.3), SIMDE_FLOAT16_VALUE(3.5), + SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(-7.3), SIMDE_FLOAT16_VALUE(-3.4), SIMDE_FLOAT16_VALUE(7.5) }, + { SIMDE_FLOAT16_VALUE(-2.6), SIMDE_FLOAT16_VALUE(6.5), SIMDE_FLOAT16_VALUE(6.8), SIMDE_FLOAT16_VALUE(-7.0), + SIMDE_FLOAT16_VALUE(5.0), SIMDE_FLOAT16_VALUE(9.3), SIMDE_FLOAT16_VALUE(-9.0), SIMDE_FLOAT16_VALUE(-2.0) }, + { SIMDE_FLOAT16_VALUE(-5.1), SIMDE_FLOAT16_VALUE(5.4), SIMDE_FLOAT16_VALUE(3.8), SIMDE_FLOAT16_VALUE(4.3) }, + { SIMDE_FLOAT16_VALUE(15.860), SIMDE_FLOAT16_VALUE(-33.050), SIMDE_FLOAT16_VALUE(-32.380), SIMDE_FLOAT16_VALUE(39.200), + SIMDE_FLOAT16_VALUE(-24.400), SIMDE_FLOAT16_VALUE(-54.730), SIMDE_FLOAT16_VALUE(42.500), SIMDE_FLOAT16_VALUE(17.700) }, + { SIMDE_FLOAT16_VALUE(-11.440), SIMDE_FLOAT16_VALUE(35.200), SIMDE_FLOAT16_VALUE(39.020), SIMDE_FLOAT16_VALUE(-34.300), + SIMDE_FLOAT16_VALUE(28.100), SIMDE_FLOAT16_VALUE(42.920), SIMDE_FLOAT16_VALUE(-52.000), SIMDE_FLOAT16_VALUE(-3.300) }, + { SIMDE_FLOAT16_VALUE(-7.280), SIMDE_FLOAT16_VALUE(24.800), SIMDE_FLOAT16_VALUE(28.140), SIMDE_FLOAT16_VALUE(-23.100), + SIMDE_FLOAT16_VALUE(20.100), SIMDE_FLOAT16_VALUE(28.040), SIMDE_FLOAT16_VALUE(-37.600), SIMDE_FLOAT16_VALUE(-0.100) }, + { SIMDE_FLOAT16_VALUE(-8.580), SIMDE_FLOAT16_VALUE(28.050), SIMDE_FLOAT16_VALUE(31.540), SIMDE_FLOAT16_VALUE(-26.600), + SIMDE_FLOAT16_VALUE(22.600), SIMDE_FLOAT16_VALUE(32.690), SIMDE_FLOAT16_VALUE(-42.100), SIMDE_FLOAT16_VALUE(-1.100) } }, + { { SIMDE_FLOAT16_VALUE(-5.3), SIMDE_FLOAT16_VALUE(9.3), SIMDE_FLOAT16_VALUE(4.7), SIMDE_FLOAT16_VALUE(-7.4), + SIMDE_FLOAT16_VALUE(4.1), SIMDE_FLOAT16_VALUE(2.3), SIMDE_FLOAT16_VALUE(5.5), SIMDE_FLOAT16_VALUE(1.2) }, + { SIMDE_FLOAT16_VALUE(1.6), SIMDE_FLOAT16_VALUE(-9.7), SIMDE_FLOAT16_VALUE(-5.9), SIMDE_FLOAT16_VALUE(-8.1), + SIMDE_FLOAT16_VALUE(0.2), SIMDE_FLOAT16_VALUE(-3.3), SIMDE_FLOAT16_VALUE(3.2), SIMDE_FLOAT16_VALUE(6.8) }, + { SIMDE_FLOAT16_VALUE(1.5), SIMDE_FLOAT16_VALUE(3.9), SIMDE_FLOAT16_VALUE(5.6), SIMDE_FLOAT16_VALUE(7.1) }, + { SIMDE_FLOAT16_VALUE(-2.900), SIMDE_FLOAT16_VALUE(-5.250), SIMDE_FLOAT16_VALUE(-4.150), SIMDE_FLOAT16_VALUE(-19.550), + SIMDE_FLOAT16_VALUE(4.400), SIMDE_FLOAT16_VALUE(-2.650), SIMDE_FLOAT16_VALUE(10.300), SIMDE_FLOAT16_VALUE(11.400) }, + { SIMDE_FLOAT16_VALUE(0.940), SIMDE_FLOAT16_VALUE(-28.530), SIMDE_FLOAT16_VALUE(-18.310), SIMDE_FLOAT16_VALUE(-38.990), + SIMDE_FLOAT16_VALUE(4.880), SIMDE_FLOAT16_VALUE(-10.570), SIMDE_FLOAT16_VALUE(17.980), SIMDE_FLOAT16_VALUE(27.720) }, + { SIMDE_FLOAT16_VALUE(3.660), SIMDE_FLOAT16_VALUE(-45.020), SIMDE_FLOAT16_VALUE(-28.340), SIMDE_FLOAT16_VALUE(-52.760), + SIMDE_FLOAT16_VALUE(5.220), SIMDE_FLOAT16_VALUE(-16.180), SIMDE_FLOAT16_VALUE(23.420), SIMDE_FLOAT16_VALUE(39.280) }, + { SIMDE_FLOAT16_VALUE(6.060), SIMDE_FLOAT16_VALUE(-59.570), SIMDE_FLOAT16_VALUE(-37.190), SIMDE_FLOAT16_VALUE(-64.910), + SIMDE_FLOAT16_VALUE(5.520), SIMDE_FLOAT16_VALUE(-21.130), SIMDE_FLOAT16_VALUE(28.220), SIMDE_FLOAT16_VALUE(49.480) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t v = simde_vld1_f16(test_vec[i].v); + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16x8_t r0 = simde_vfmaq_lane_f16(a, b, v, 0); + simde_float16x8_t r1 = simde_vfmaq_lane_f16(a, b, v, 1); + simde_float16x8_t r2 = simde_vfmaq_lane_f16(a, b, v, 2); + simde_float16x8_t r3 = simde_vfmaq_lane_f16(a, b, v, 3); + simde_test_arm_neon_assert_equal_f16x8(r0, simde_vld1q_f16(test_vec[i].r0), 1); + simde_test_arm_neon_assert_equal_f16x8(r1, simde_vld1q_f16(test_vec[i].r1), 1); + simde_test_arm_neon_assert_equal_f16x8(r2, simde_vld1q_f16(test_vec[i].r2), 1); + simde_test_arm_neon_assert_equal_f16x8(r3, simde_vld1q_f16(test_vec[i].r3), 1); + } + + return 0; +} + static int test_simde_vfmaq_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -826,6 +1252,158 @@ test_simde_vfmaq_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vfmaq_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[8]; + simde_float16_t b[8]; + simde_float16_t v[8]; + simde_float16_t r0[8]; + simde_float16_t r1[8]; + simde_float16_t r2[8]; + simde_float16_t r3[8]; + simde_float16_t r4[8]; + simde_float16_t r5[8]; + simde_float16_t r6[8]; + simde_float16_t r7[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-2.0), SIMDE_FLOAT16_VALUE(2.7), SIMDE_FLOAT16_VALUE(4.9), SIMDE_FLOAT16_VALUE(-3.9), + SIMDE_FLOAT16_VALUE(8.2), SIMDE_FLOAT16_VALUE(6.1), SIMDE_FLOAT16_VALUE(3.5), SIMDE_FLOAT16_VALUE(-5.2) }, + { SIMDE_FLOAT16_VALUE(9.0), SIMDE_FLOAT16_VALUE(3.5), SIMDE_FLOAT16_VALUE(3.5), SIMDE_FLOAT16_VALUE(-5.4), + SIMDE_FLOAT16_VALUE(-2.5), SIMDE_FLOAT16_VALUE(5.4), SIMDE_FLOAT16_VALUE(-9.9), SIMDE_FLOAT16_VALUE(-7.6) }, + { SIMDE_FLOAT16_VALUE(-5.5), SIMDE_FLOAT16_VALUE(-1.5), SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(0.9), + SIMDE_FLOAT16_VALUE(-2.0), SIMDE_FLOAT16_VALUE(3.5), SIMDE_FLOAT16_VALUE(-1.8), SIMDE_FLOAT16_VALUE(7.2) }, + { SIMDE_FLOAT16_VALUE(-51.500), SIMDE_FLOAT16_VALUE(-16.550), SIMDE_FLOAT16_VALUE(-14.350), SIMDE_FLOAT16_VALUE(25.800), + SIMDE_FLOAT16_VALUE(21.950), SIMDE_FLOAT16_VALUE(-23.600), SIMDE_FLOAT16_VALUE(57.950), SIMDE_FLOAT16_VALUE(36.600) }, + { SIMDE_FLOAT16_VALUE(-15.500), SIMDE_FLOAT16_VALUE(-2.550), SIMDE_FLOAT16_VALUE(-0.350), SIMDE_FLOAT16_VALUE(4.200), + SIMDE_FLOAT16_VALUE(11.950), SIMDE_FLOAT16_VALUE(-2.000), SIMDE_FLOAT16_VALUE(18.350), SIMDE_FLOAT16_VALUE(6.200) }, + { SIMDE_FLOAT16_VALUE(1.600), SIMDE_FLOAT16_VALUE(4.100), SIMDE_FLOAT16_VALUE(6.300), SIMDE_FLOAT16_VALUE(-6.060), + SIMDE_FLOAT16_VALUE(7.200), SIMDE_FLOAT16_VALUE(8.260), SIMDE_FLOAT16_VALUE(-0.460), SIMDE_FLOAT16_VALUE(-8.240) }, + { SIMDE_FLOAT16_VALUE(6.100), SIMDE_FLOAT16_VALUE(5.850), SIMDE_FLOAT16_VALUE(8.050), SIMDE_FLOAT16_VALUE(-8.760), + SIMDE_FLOAT16_VALUE(5.950), SIMDE_FLOAT16_VALUE(10.960), SIMDE_FLOAT16_VALUE(-5.410), SIMDE_FLOAT16_VALUE(-12.040) }, + { SIMDE_FLOAT16_VALUE(-20.000), SIMDE_FLOAT16_VALUE(-4.300), SIMDE_FLOAT16_VALUE(-2.100), SIMDE_FLOAT16_VALUE(6.900), + SIMDE_FLOAT16_VALUE(13.200), SIMDE_FLOAT16_VALUE(-4.700), SIMDE_FLOAT16_VALUE(23.300), SIMDE_FLOAT16_VALUE(10.000) }, + { SIMDE_FLOAT16_VALUE(29.500), SIMDE_FLOAT16_VALUE(14.950), SIMDE_FLOAT16_VALUE(17.150), SIMDE_FLOAT16_VALUE(-22.800), + SIMDE_FLOAT16_VALUE(-0.550), SIMDE_FLOAT16_VALUE(25.000), SIMDE_FLOAT16_VALUE(-31.150), SIMDE_FLOAT16_VALUE(-31.800) }, + { SIMDE_FLOAT16_VALUE(-18.200), SIMDE_FLOAT16_VALUE(-3.600), SIMDE_FLOAT16_VALUE(-1.400), SIMDE_FLOAT16_VALUE(5.820), + SIMDE_FLOAT16_VALUE(12.700), SIMDE_FLOAT16_VALUE(-3.620), SIMDE_FLOAT16_VALUE(21.320), SIMDE_FLOAT16_VALUE(8.480) }, + { SIMDE_FLOAT16_VALUE(62.800), SIMDE_FLOAT16_VALUE(27.900), SIMDE_FLOAT16_VALUE(30.100), SIMDE_FLOAT16_VALUE(-42.780), + SIMDE_FLOAT16_VALUE(-9.800), SIMDE_FLOAT16_VALUE(44.980), SIMDE_FLOAT16_VALUE(-67.780), SIMDE_FLOAT16_VALUE(-59.920) } }, + { { SIMDE_FLOAT16_VALUE(3.3), SIMDE_FLOAT16_VALUE(-3.8), SIMDE_FLOAT16_VALUE(-0.1), SIMDE_FLOAT16_VALUE(-2.1), + SIMDE_FLOAT16_VALUE(-1.9), SIMDE_FLOAT16_VALUE(9.2), SIMDE_FLOAT16_VALUE(5.5), SIMDE_FLOAT16_VALUE(-4.7) }, + { SIMDE_FLOAT16_VALUE(2.6), SIMDE_FLOAT16_VALUE(-9.4), SIMDE_FLOAT16_VALUE(5.0), SIMDE_FLOAT16_VALUE(7.1), + SIMDE_FLOAT16_VALUE(7.4), SIMDE_FLOAT16_VALUE(-4.0), SIMDE_FLOAT16_VALUE(1.2), SIMDE_FLOAT16_VALUE(2.2) }, + { SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(9.3), SIMDE_FLOAT16_VALUE(7.4), SIMDE_FLOAT16_VALUE(4.7), + SIMDE_FLOAT16_VALUE(-4.4), SIMDE_FLOAT16_VALUE(-9.4), SIMDE_FLOAT16_VALUE(6.4), SIMDE_FLOAT16_VALUE(-4.7) }, + { SIMDE_FLOAT16_VALUE(4.080), SIMDE_FLOAT16_VALUE(-6.620), SIMDE_FLOAT16_VALUE(1.400), SIMDE_FLOAT16_VALUE(0.030), + SIMDE_FLOAT16_VALUE(0.320), SIMDE_FLOAT16_VALUE(8.000), SIMDE_FLOAT16_VALUE(5.860), SIMDE_FLOAT16_VALUE(-4.040) }, + { SIMDE_FLOAT16_VALUE(27.480), SIMDE_FLOAT16_VALUE(-91.220), SIMDE_FLOAT16_VALUE(46.400), SIMDE_FLOAT16_VALUE(63.930), + SIMDE_FLOAT16_VALUE(66.920), SIMDE_FLOAT16_VALUE(-28.000), SIMDE_FLOAT16_VALUE(16.660), SIMDE_FLOAT16_VALUE(15.760) }, + { SIMDE_FLOAT16_VALUE(22.540), SIMDE_FLOAT16_VALUE(-73.360), SIMDE_FLOAT16_VALUE(36.900), SIMDE_FLOAT16_VALUE(50.440), + SIMDE_FLOAT16_VALUE(52.860), SIMDE_FLOAT16_VALUE(-20.400), SIMDE_FLOAT16_VALUE(14.380), SIMDE_FLOAT16_VALUE(11.580) }, + { SIMDE_FLOAT16_VALUE(15.520), SIMDE_FLOAT16_VALUE(-47.980), SIMDE_FLOAT16_VALUE(23.400), SIMDE_FLOAT16_VALUE(31.270), + SIMDE_FLOAT16_VALUE(32.880), SIMDE_FLOAT16_VALUE(-9.600), SIMDE_FLOAT16_VALUE(11.140), SIMDE_FLOAT16_VALUE(5.640) }, + { SIMDE_FLOAT16_VALUE(-8.140), SIMDE_FLOAT16_VALUE(37.560), SIMDE_FLOAT16_VALUE(-22.100), SIMDE_FLOAT16_VALUE(-33.340), + SIMDE_FLOAT16_VALUE(-34.460), SIMDE_FLOAT16_VALUE(26.800), SIMDE_FLOAT16_VALUE(0.220), SIMDE_FLOAT16_VALUE(-14.380) }, + { SIMDE_FLOAT16_VALUE(-21.140), SIMDE_FLOAT16_VALUE(84.560), SIMDE_FLOAT16_VALUE(-47.100), SIMDE_FLOAT16_VALUE(-68.840), + SIMDE_FLOAT16_VALUE(-71.460), SIMDE_FLOAT16_VALUE(46.800), SIMDE_FLOAT16_VALUE(-5.780), SIMDE_FLOAT16_VALUE(-25.380) }, + { SIMDE_FLOAT16_VALUE(19.940), SIMDE_FLOAT16_VALUE(-63.960), SIMDE_FLOAT16_VALUE(31.900), SIMDE_FLOAT16_VALUE(43.340), + SIMDE_FLOAT16_VALUE(45.460), SIMDE_FLOAT16_VALUE(-16.400), SIMDE_FLOAT16_VALUE(13.180), SIMDE_FLOAT16_VALUE(9.380) }, + { SIMDE_FLOAT16_VALUE(-8.920), SIMDE_FLOAT16_VALUE(40.380), SIMDE_FLOAT16_VALUE(-23.600), SIMDE_FLOAT16_VALUE(-35.470), + SIMDE_FLOAT16_VALUE(-36.680), SIMDE_FLOAT16_VALUE(28.000), SIMDE_FLOAT16_VALUE(-0.140), SIMDE_FLOAT16_VALUE(-15.040) } }, + { { SIMDE_FLOAT16_VALUE(2.9), SIMDE_FLOAT16_VALUE(8.9), SIMDE_FLOAT16_VALUE(8.8), SIMDE_FLOAT16_VALUE(-3.6), + SIMDE_FLOAT16_VALUE(-0.3), SIMDE_FLOAT16_VALUE(1.9), SIMDE_FLOAT16_VALUE(-4.0), SIMDE_FLOAT16_VALUE(-3.6) }, + { SIMDE_FLOAT16_VALUE(-5.7), SIMDE_FLOAT16_VALUE(8.3), SIMDE_FLOAT16_VALUE(-6.3), SIMDE_FLOAT16_VALUE(-5.6), + SIMDE_FLOAT16_VALUE(-6.0), SIMDE_FLOAT16_VALUE(7.4), SIMDE_FLOAT16_VALUE(1.9), SIMDE_FLOAT16_VALUE(-5.9) }, + { SIMDE_FLOAT16_VALUE(-3.9), SIMDE_FLOAT16_VALUE(3.9), SIMDE_FLOAT16_VALUE(8.7), SIMDE_FLOAT16_VALUE(2.4), + SIMDE_FLOAT16_VALUE(-3.2), SIMDE_FLOAT16_VALUE(6.5), SIMDE_FLOAT16_VALUE(9.1), SIMDE_FLOAT16_VALUE(-9.1) }, + { SIMDE_FLOAT16_VALUE(25.130), SIMDE_FLOAT16_VALUE(-23.470), SIMDE_FLOAT16_VALUE(33.370), SIMDE_FLOAT16_VALUE(18.240), + SIMDE_FLOAT16_VALUE(23.100), SIMDE_FLOAT16_VALUE(-26.960), SIMDE_FLOAT16_VALUE(-11.410), SIMDE_FLOAT16_VALUE(19.410) }, + { SIMDE_FLOAT16_VALUE(-19.330), SIMDE_FLOAT16_VALUE(41.270), SIMDE_FLOAT16_VALUE(-15.770), SIMDE_FLOAT16_VALUE(-25.440), + SIMDE_FLOAT16_VALUE(-23.700), SIMDE_FLOAT16_VALUE(30.760), SIMDE_FLOAT16_VALUE(3.410), SIMDE_FLOAT16_VALUE(-26.610) }, + { SIMDE_FLOAT16_VALUE(-46.690), SIMDE_FLOAT16_VALUE(81.110), SIMDE_FLOAT16_VALUE(-46.010), SIMDE_FLOAT16_VALUE(-52.320), + SIMDE_FLOAT16_VALUE(-52.500), SIMDE_FLOAT16_VALUE(66.280), SIMDE_FLOAT16_VALUE(12.530), SIMDE_FLOAT16_VALUE(-54.930) }, + { SIMDE_FLOAT16_VALUE(-10.780), SIMDE_FLOAT16_VALUE(28.820), SIMDE_FLOAT16_VALUE(-6.320), SIMDE_FLOAT16_VALUE(-17.040), + SIMDE_FLOAT16_VALUE(-14.700), SIMDE_FLOAT16_VALUE(19.660), SIMDE_FLOAT16_VALUE(0.560), SIMDE_FLOAT16_VALUE(-17.760) }, + { SIMDE_FLOAT16_VALUE(21.140), SIMDE_FLOAT16_VALUE(-17.660), SIMDE_FLOAT16_VALUE(28.960), SIMDE_FLOAT16_VALUE(14.320), + SIMDE_FLOAT16_VALUE(18.900), SIMDE_FLOAT16_VALUE(-21.780), SIMDE_FLOAT16_VALUE(-10.080), SIMDE_FLOAT16_VALUE(15.280) }, + { SIMDE_FLOAT16_VALUE(-34.150), SIMDE_FLOAT16_VALUE(62.850), SIMDE_FLOAT16_VALUE(-32.150), SIMDE_FLOAT16_VALUE(-40.000), + SIMDE_FLOAT16_VALUE(-39.300), SIMDE_FLOAT16_VALUE(50.000), SIMDE_FLOAT16_VALUE(8.350), SIMDE_FLOAT16_VALUE(-41.950) }, + { SIMDE_FLOAT16_VALUE(-48.970), SIMDE_FLOAT16_VALUE(84.430), SIMDE_FLOAT16_VALUE(-48.530), SIMDE_FLOAT16_VALUE(-54.560), + SIMDE_FLOAT16_VALUE(-54.900), SIMDE_FLOAT16_VALUE(69.240), SIMDE_FLOAT16_VALUE(13.290), SIMDE_FLOAT16_VALUE(-57.290) }, + { SIMDE_FLOAT16_VALUE(54.770), SIMDE_FLOAT16_VALUE(-66.630), SIMDE_FLOAT16_VALUE(66.130), SIMDE_FLOAT16_VALUE(47.360), + SIMDE_FLOAT16_VALUE(54.300), SIMDE_FLOAT16_VALUE(-65.440), SIMDE_FLOAT16_VALUE(-21.290), SIMDE_FLOAT16_VALUE(50.090) } }, + { { SIMDE_FLOAT16_VALUE(3.1), SIMDE_FLOAT16_VALUE(-8.0), SIMDE_FLOAT16_VALUE(-5.6), SIMDE_FLOAT16_VALUE(-2.4), + SIMDE_FLOAT16_VALUE(-3.6), SIMDE_FLOAT16_VALUE(-2.7), SIMDE_FLOAT16_VALUE(-7.5), SIMDE_FLOAT16_VALUE(1.2) }, + { SIMDE_FLOAT16_VALUE(9.7), SIMDE_FLOAT16_VALUE(-1.8), SIMDE_FLOAT16_VALUE(7.4), SIMDE_FLOAT16_VALUE(-0.5), + SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(-2.4), SIMDE_FLOAT16_VALUE(-6.1), SIMDE_FLOAT16_VALUE(3.3) }, + { SIMDE_FLOAT16_VALUE(7.4), SIMDE_FLOAT16_VALUE(-4.3), SIMDE_FLOAT16_VALUE(9.1), SIMDE_FLOAT16_VALUE(1.2), + SIMDE_FLOAT16_VALUE(2.6), SIMDE_FLOAT16_VALUE(-1.0), SIMDE_FLOAT16_VALUE(-2.8), SIMDE_FLOAT16_VALUE(4.8) }, + { SIMDE_FLOAT16_VALUE(74.880), SIMDE_FLOAT16_VALUE(-21.320), SIMDE_FLOAT16_VALUE(49.160), SIMDE_FLOAT16_VALUE(-6.100), + SIMDE_FLOAT16_VALUE(6.760), SIMDE_FLOAT16_VALUE(-20.460), SIMDE_FLOAT16_VALUE(-52.640), SIMDE_FLOAT16_VALUE(25.620) }, + { SIMDE_FLOAT16_VALUE(-38.610), SIMDE_FLOAT16_VALUE(-0.260), SIMDE_FLOAT16_VALUE(-37.420), SIMDE_FLOAT16_VALUE(-0.250), + SIMDE_FLOAT16_VALUE(-9.620), SIMDE_FLOAT16_VALUE(7.620), SIMDE_FLOAT16_VALUE(18.730), SIMDE_FLOAT16_VALUE(-12.990) }, + { SIMDE_FLOAT16_VALUE(91.370), SIMDE_FLOAT16_VALUE(-24.380), SIMDE_FLOAT16_VALUE(61.740), SIMDE_FLOAT16_VALUE(-6.950), + SIMDE_FLOAT16_VALUE(9.140), SIMDE_FLOAT16_VALUE(-24.540), SIMDE_FLOAT16_VALUE(-63.010), SIMDE_FLOAT16_VALUE(31.230) }, + { SIMDE_FLOAT16_VALUE(14.740), SIMDE_FLOAT16_VALUE(-10.160), SIMDE_FLOAT16_VALUE(3.280), SIMDE_FLOAT16_VALUE(-3.000), + SIMDE_FLOAT16_VALUE(-1.920), SIMDE_FLOAT16_VALUE(-5.580), SIMDE_FLOAT16_VALUE(-14.820), SIMDE_FLOAT16_VALUE(5.160) }, + { SIMDE_FLOAT16_VALUE(28.320), SIMDE_FLOAT16_VALUE(-12.680), SIMDE_FLOAT16_VALUE(13.640), SIMDE_FLOAT16_VALUE(-3.700), + SIMDE_FLOAT16_VALUE(0.040), SIMDE_FLOAT16_VALUE(-8.940), SIMDE_FLOAT16_VALUE(-23.360), SIMDE_FLOAT16_VALUE(9.780) }, + { SIMDE_FLOAT16_VALUE(-6.600), SIMDE_FLOAT16_VALUE(-6.200), SIMDE_FLOAT16_VALUE(-13.000), SIMDE_FLOAT16_VALUE(-1.900), + SIMDE_FLOAT16_VALUE(-5.000), SIMDE_FLOAT16_VALUE(-0.300), SIMDE_FLOAT16_VALUE(-1.400), SIMDE_FLOAT16_VALUE(-2.100) }, + { SIMDE_FLOAT16_VALUE(-24.060), SIMDE_FLOAT16_VALUE(-2.960), SIMDE_FLOAT16_VALUE(-26.320), SIMDE_FLOAT16_VALUE(-1.000), + SIMDE_FLOAT16_VALUE(-7.520), SIMDE_FLOAT16_VALUE(4.020), SIMDE_FLOAT16_VALUE(9.580), SIMDE_FLOAT16_VALUE(-8.040) }, + { SIMDE_FLOAT16_VALUE(49.660), SIMDE_FLOAT16_VALUE(-16.640), SIMDE_FLOAT16_VALUE(29.920), SIMDE_FLOAT16_VALUE(-4.800), + SIMDE_FLOAT16_VALUE(3.120), SIMDE_FLOAT16_VALUE(-14.220), SIMDE_FLOAT16_VALUE(-36.780), SIMDE_FLOAT16_VALUE(17.040) } }, + { { SIMDE_FLOAT16_VALUE(5.2), SIMDE_FLOAT16_VALUE(-1.0), SIMDE_FLOAT16_VALUE(4.9), SIMDE_FLOAT16_VALUE(5.0), + SIMDE_FLOAT16_VALUE(-8.1), SIMDE_FLOAT16_VALUE(-2.2), SIMDE_FLOAT16_VALUE(8.4), SIMDE_FLOAT16_VALUE(-8.1) }, + { SIMDE_FLOAT16_VALUE(9.2), SIMDE_FLOAT16_VALUE(-1.5), SIMDE_FLOAT16_VALUE(-8.3), SIMDE_FLOAT16_VALUE(-1.5), + SIMDE_FLOAT16_VALUE(1.9), SIMDE_FLOAT16_VALUE(-6.1), SIMDE_FLOAT16_VALUE(-6.7), SIMDE_FLOAT16_VALUE(8.9) }, + { SIMDE_FLOAT16_VALUE(5.6), SIMDE_FLOAT16_VALUE(-6.1), SIMDE_FLOAT16_VALUE(8.5), SIMDE_FLOAT16_VALUE(-5.9), + SIMDE_FLOAT16_VALUE(7.1), SIMDE_FLOAT16_VALUE(-1.0), SIMDE_FLOAT16_VALUE(-4.0), SIMDE_FLOAT16_VALUE(3.3) }, + { SIMDE_FLOAT16_VALUE(56.720), SIMDE_FLOAT16_VALUE(-9.400), SIMDE_FLOAT16_VALUE(-41.580), SIMDE_FLOAT16_VALUE(-3.400), + SIMDE_FLOAT16_VALUE(2.540), SIMDE_FLOAT16_VALUE(-36.360), SIMDE_FLOAT16_VALUE(-29.120), SIMDE_FLOAT16_VALUE(41.740) }, + { SIMDE_FLOAT16_VALUE(-50.920), SIMDE_FLOAT16_VALUE(8.150), SIMDE_FLOAT16_VALUE(55.530), SIMDE_FLOAT16_VALUE(14.150), + SIMDE_FLOAT16_VALUE(-19.690), SIMDE_FLOAT16_VALUE(35.010), SIMDE_FLOAT16_VALUE(49.270), SIMDE_FLOAT16_VALUE(-62.390) }, + { SIMDE_FLOAT16_VALUE(83.400), SIMDE_FLOAT16_VALUE(-13.750), SIMDE_FLOAT16_VALUE(-65.650), SIMDE_FLOAT16_VALUE(-7.750), + SIMDE_FLOAT16_VALUE(8.050), SIMDE_FLOAT16_VALUE(-54.050), SIMDE_FLOAT16_VALUE(-48.550), SIMDE_FLOAT16_VALUE(67.550) }, + { SIMDE_FLOAT16_VALUE(-49.080), SIMDE_FLOAT16_VALUE(7.850), SIMDE_FLOAT16_VALUE(53.870), SIMDE_FLOAT16_VALUE(13.850), + SIMDE_FLOAT16_VALUE(-19.310), SIMDE_FLOAT16_VALUE(33.790), SIMDE_FLOAT16_VALUE(47.930), SIMDE_FLOAT16_VALUE(-60.610) }, + { SIMDE_FLOAT16_VALUE(70.520), SIMDE_FLOAT16_VALUE(-11.650), SIMDE_FLOAT16_VALUE(-54.030), SIMDE_FLOAT16_VALUE(-5.650), + SIMDE_FLOAT16_VALUE(5.390), SIMDE_FLOAT16_VALUE(-45.510), SIMDE_FLOAT16_VALUE(-39.170), SIMDE_FLOAT16_VALUE(55.090) }, + { SIMDE_FLOAT16_VALUE(-4.000), SIMDE_FLOAT16_VALUE(0.500), SIMDE_FLOAT16_VALUE(13.200), SIMDE_FLOAT16_VALUE(6.500), + SIMDE_FLOAT16_VALUE(-10.000), SIMDE_FLOAT16_VALUE(3.900), SIMDE_FLOAT16_VALUE(15.100), SIMDE_FLOAT16_VALUE(-17.000) }, + { SIMDE_FLOAT16_VALUE(-31.600), SIMDE_FLOAT16_VALUE(5.000), SIMDE_FLOAT16_VALUE(38.100), SIMDE_FLOAT16_VALUE(11.000), + SIMDE_FLOAT16_VALUE(-15.700), SIMDE_FLOAT16_VALUE(22.200), SIMDE_FLOAT16_VALUE(35.200), SIMDE_FLOAT16_VALUE(-43.700) }, + { SIMDE_FLOAT16_VALUE(35.560), SIMDE_FLOAT16_VALUE(-5.950), SIMDE_FLOAT16_VALUE(-22.490), SIMDE_FLOAT16_VALUE(0.050), + SIMDE_FLOAT16_VALUE(-1.830), SIMDE_FLOAT16_VALUE(-22.330), SIMDE_FLOAT16_VALUE(-13.710), SIMDE_FLOAT16_VALUE(21.270) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t v = simde_vld1q_f16(test_vec[i].v); + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16x8_t r0 = simde_vfmaq_laneq_f16(a, b, v, 0); + simde_float16x8_t r1 = simde_vfmaq_laneq_f16(a, b, v, 1); + simde_float16x8_t r2 = simde_vfmaq_laneq_f16(a, b, v, 2); + simde_float16x8_t r3 = simde_vfmaq_laneq_f16(a, b, v, 3); + simde_float16x8_t r4 = simde_vfmaq_laneq_f16(a, b, v, 4); + simde_float16x8_t r5 = simde_vfmaq_laneq_f16(a, b, v, 5); + simde_float16x8_t r6 = simde_vfmaq_laneq_f16(a, b, v, 6); + simde_float16x8_t r7 = simde_vfmaq_laneq_f16(a, b, v, 7); + simde_test_arm_neon_assert_equal_f16x8(r0, simde_vld1q_f16(test_vec[i].r0), 1); + simde_test_arm_neon_assert_equal_f16x8(r1, simde_vld1q_f16(test_vec[i].r1), 1); + simde_test_arm_neon_assert_equal_f16x8(r2, simde_vld1q_f16(test_vec[i].r2), 1); + simde_test_arm_neon_assert_equal_f16x8(r3, simde_vld1q_f16(test_vec[i].r3), 1); + simde_test_arm_neon_assert_equal_f16x8(r4, simde_vld1q_f16(test_vec[i].r4), 1); + simde_test_arm_neon_assert_equal_f16x8(r5, simde_vld1q_f16(test_vec[i].r5), 1); + simde_test_arm_neon_assert_equal_f16x8(r6, simde_vld1q_f16(test_vec[i].r6), 1); + simde_test_arm_neon_assert_equal_f16x8(r7, simde_vld1q_f16(test_vec[i].r7), 1); + } + + return 0; +} + static int test_simde_vfmaq_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1023,15 +1601,22 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vfmad_laneq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vfmas_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vfmas_laneq_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vfmah_lane_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vfmah_laneq_f16) + +SIMDE_TEST_FUNC_LIST_ENTRY(vfma_lane_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vfma_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vfma_lane_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vfma_laneq_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vfma_laneq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vfma_laneq_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vfmaq_lane_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vfmaq_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vfmaq_lane_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vfmaq_laneq_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vfmaq_laneq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vfmaq_laneq_f64) SIMDE_TEST_FUNC_LIST_END diff --git a/test/arm/neon/fma_n.c b/test/arm/neon/fma_n.c index 063b7eaa6..8d45763bc 100644 --- a/test/arm/neon/fma_n.c +++ b/test/arm/neon/fma_n.c @@ -3,6 +3,125 @@ #include "test-neon.h" #include "../../../simde/arm/neon/fma_n.h" +static int +test_simde_vfma_n_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + simde_float16 b[4]; + simde_float16 c; + simde_float16 r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(3.1), SIMDE_FLOAT16_VALUE(0.5), SIMDE_FLOAT16_VALUE(-11.4), SIMDE_FLOAT16_VALUE(4.0) }, + { SIMDE_FLOAT16_VALUE(12.9), SIMDE_FLOAT16_VALUE(-2.2), SIMDE_FLOAT16_VALUE(6.4), SIMDE_FLOAT16_VALUE(-0.6) }, + SIMDE_FLOAT16_VALUE(-2.9), + { SIMDE_FLOAT16_VALUE(-34.310), SIMDE_FLOAT16_VALUE(6.880), SIMDE_FLOAT16_VALUE(-29.960), SIMDE_FLOAT16_VALUE(5.740) } }, + { { SIMDE_FLOAT16_VALUE(11.7), SIMDE_FLOAT16_VALUE(6.0), SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(-10.4) }, + { SIMDE_FLOAT16_VALUE(14.7), SIMDE_FLOAT16_VALUE(-8.9), SIMDE_FLOAT16_VALUE(-13.1), SIMDE_FLOAT16_VALUE(13.6) }, + SIMDE_FLOAT16_VALUE(2.3), + { SIMDE_FLOAT16_VALUE(45.510), SIMDE_FLOAT16_VALUE(-14.470), SIMDE_FLOAT16_VALUE(-29.830), SIMDE_FLOAT16_VALUE(20.880) } }, + { { SIMDE_FLOAT16_VALUE(10.9), SIMDE_FLOAT16_VALUE(-8.6), SIMDE_FLOAT16_VALUE(8.1), SIMDE_FLOAT16_VALUE(-1.3) }, + { SIMDE_FLOAT16_VALUE(-1.9), SIMDE_FLOAT16_VALUE(4.7), SIMDE_FLOAT16_VALUE(10.2), SIMDE_FLOAT16_VALUE(-10.8) }, + SIMDE_FLOAT16_VALUE(-3.9), + { SIMDE_FLOAT16_VALUE(18.310), SIMDE_FLOAT16_VALUE(-26.930), SIMDE_FLOAT16_VALUE(-31.680), SIMDE_FLOAT16_VALUE(40.820) } }, + { { SIMDE_FLOAT16_VALUE(-0.5), SIMDE_FLOAT16_VALUE(9.7), SIMDE_FLOAT16_VALUE(-8.4), SIMDE_FLOAT16_VALUE(12.4) }, + { SIMDE_FLOAT16_VALUE(-1.8), SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(9.8), SIMDE_FLOAT16_VALUE(-14.7) }, + SIMDE_FLOAT16_VALUE(3.6), + { SIMDE_FLOAT16_VALUE(-6.980), SIMDE_FLOAT16_VALUE(13.660), SIMDE_FLOAT16_VALUE(26.880), SIMDE_FLOAT16_VALUE(-40.520) } }, + { { SIMDE_FLOAT16_VALUE(-9.0), SIMDE_FLOAT16_VALUE(-8.0), SIMDE_FLOAT16_VALUE(-14.7), SIMDE_FLOAT16_VALUE(-9.6) }, + { SIMDE_FLOAT16_VALUE(13.2), SIMDE_FLOAT16_VALUE(-5.4), SIMDE_FLOAT16_VALUE(3.0), SIMDE_FLOAT16_VALUE(-12.0) }, + SIMDE_FLOAT16_VALUE(-4.9), + { SIMDE_FLOAT16_VALUE(-73.680), SIMDE_FLOAT16_VALUE(18.460), SIMDE_FLOAT16_VALUE(-29.400), SIMDE_FLOAT16_VALUE(49.200) } }, + { { SIMDE_FLOAT16_VALUE(-12.0), SIMDE_FLOAT16_VALUE(-9.3), SIMDE_FLOAT16_VALUE(-12.7), SIMDE_FLOAT16_VALUE(4.8) }, + { SIMDE_FLOAT16_VALUE(13.8), SIMDE_FLOAT16_VALUE(7.0), SIMDE_FLOAT16_VALUE(-6.1), SIMDE_FLOAT16_VALUE(-14.4) }, + SIMDE_FLOAT16_VALUE(-1.3), + { SIMDE_FLOAT16_VALUE(-29.940), SIMDE_FLOAT16_VALUE(-18.400), SIMDE_FLOAT16_VALUE(-4.770), SIMDE_FLOAT16_VALUE(23.520) } }, + { { SIMDE_FLOAT16_VALUE(-14.2), SIMDE_FLOAT16_VALUE(11.2), SIMDE_FLOAT16_VALUE(3.6), SIMDE_FLOAT16_VALUE(-1.8) }, + { SIMDE_FLOAT16_VALUE(-3.0), SIMDE_FLOAT16_VALUE(-8.8), SIMDE_FLOAT16_VALUE(-10.4), SIMDE_FLOAT16_VALUE(-5.9) }, + SIMDE_FLOAT16_VALUE(4.3), + { SIMDE_FLOAT16_VALUE(-27.100), SIMDE_FLOAT16_VALUE(-26.640), SIMDE_FLOAT16_VALUE(-41.120), SIMDE_FLOAT16_VALUE(-27.170) } }, + { { SIMDE_FLOAT16_VALUE(8.5), SIMDE_FLOAT16_VALUE(7.4), SIMDE_FLOAT16_VALUE(-12.1), SIMDE_FLOAT16_VALUE(-15.0) }, + { SIMDE_FLOAT16_VALUE(-7.2), SIMDE_FLOAT16_VALUE(3.5), SIMDE_FLOAT16_VALUE(6.6), SIMDE_FLOAT16_VALUE(1.3) }, + SIMDE_FLOAT16_VALUE(-1.3), + { SIMDE_FLOAT16_VALUE(17.860), SIMDE_FLOAT16_VALUE(2.850), SIMDE_FLOAT16_VALUE(-20.680), SIMDE_FLOAT16_VALUE(-16.690) } }, + { { SIMDE_FLOAT16_VALUE(-8.0), SIMDE_FLOAT16_VALUE(-11.5), SIMDE_FLOAT16_VALUE(2.6), SIMDE_FLOAT16_VALUE(-0.1) }, + { SIMDE_FLOAT16_VALUE(-3.5), SIMDE_FLOAT16_VALUE(13.9), SIMDE_FLOAT16_VALUE(13.8), SIMDE_FLOAT16_VALUE(-2.1) }, + SIMDE_FLOAT16_VALUE(4.8), + { SIMDE_FLOAT16_VALUE(-24.800), SIMDE_FLOAT16_VALUE(55.220), SIMDE_FLOAT16_VALUE(68.840), SIMDE_FLOAT16_VALUE(-10.180) } }, + { { SIMDE_FLOAT16_VALUE(5.3), SIMDE_FLOAT16_VALUE(2.9), SIMDE_FLOAT16_VALUE(10.1), SIMDE_FLOAT16_VALUE(6.4) }, + { SIMDE_FLOAT16_VALUE(2.0), SIMDE_FLOAT16_VALUE(9.2), SIMDE_FLOAT16_VALUE(2.0), SIMDE_FLOAT16_VALUE(-0.2) }, + SIMDE_FLOAT16_VALUE(-4.9), + { SIMDE_FLOAT16_VALUE(-4.500), SIMDE_FLOAT16_VALUE(-42.180), SIMDE_FLOAT16_VALUE(0.300), SIMDE_FLOAT16_VALUE(7.380) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16 c = test_vec[i].c; + simde_float16x4_t r = simde_vfma_n_f16(a, b, c); + + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + } + + return 0; +} + +static int +test_simde_vfmaq_n_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[8]; + simde_float16 b[8]; + simde_float16 c; + simde_float16 r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-3.2), SIMDE_FLOAT16_VALUE(-1.5), SIMDE_FLOAT16_VALUE(-13.5), SIMDE_FLOAT16_VALUE(11.7), + SIMDE_FLOAT16_VALUE(-0.3), SIMDE_FLOAT16_VALUE(-14.6), SIMDE_FLOAT16_VALUE(-13.6), SIMDE_FLOAT16_VALUE(-2.7) }, + { SIMDE_FLOAT16_VALUE(-12.8), SIMDE_FLOAT16_VALUE(11.5), SIMDE_FLOAT16_VALUE(-14.3), SIMDE_FLOAT16_VALUE(-7.9), + SIMDE_FLOAT16_VALUE(-10.5), SIMDE_FLOAT16_VALUE(4.1), SIMDE_FLOAT16_VALUE(6.7), SIMDE_FLOAT16_VALUE(-7.5) }, + SIMDE_FLOAT16_VALUE(1.9), + { SIMDE_FLOAT16_VALUE(-27.520), SIMDE_FLOAT16_VALUE(20.350), SIMDE_FLOAT16_VALUE(-40.670), SIMDE_FLOAT16_VALUE(-3.310), + SIMDE_FLOAT16_VALUE(-20.250), SIMDE_FLOAT16_VALUE(-6.810), SIMDE_FLOAT16_VALUE(-0.870), SIMDE_FLOAT16_VALUE(-16.950) } }, + { { SIMDE_FLOAT16_VALUE(5.5), SIMDE_FLOAT16_VALUE(12.8), SIMDE_FLOAT16_VALUE(-10.5), SIMDE_FLOAT16_VALUE(-3.1), + SIMDE_FLOAT16_VALUE(-5.8), SIMDE_FLOAT16_VALUE(12.1), SIMDE_FLOAT16_VALUE(-9.6), SIMDE_FLOAT16_VALUE(10.9) }, + { SIMDE_FLOAT16_VALUE(-13.7), SIMDE_FLOAT16_VALUE(1.6), SIMDE_FLOAT16_VALUE(5.9), SIMDE_FLOAT16_VALUE(-3.2), + SIMDE_FLOAT16_VALUE(4.7), SIMDE_FLOAT16_VALUE(-7.6), SIMDE_FLOAT16_VALUE(-10.1), SIMDE_FLOAT16_VALUE(-14.5) }, + SIMDE_FLOAT16_VALUE(0.4), + { SIMDE_FLOAT16_VALUE(0.020), SIMDE_FLOAT16_VALUE(13.440), SIMDE_FLOAT16_VALUE(-8.140), SIMDE_FLOAT16_VALUE(-4.380), + SIMDE_FLOAT16_VALUE(-3.920), SIMDE_FLOAT16_VALUE(9.060), SIMDE_FLOAT16_VALUE(-13.640), SIMDE_FLOAT16_VALUE(5.100) } }, + { { SIMDE_FLOAT16_VALUE(-9.9), SIMDE_FLOAT16_VALUE(2.2), SIMDE_FLOAT16_VALUE(-4.9), SIMDE_FLOAT16_VALUE(-8.4), + SIMDE_FLOAT16_VALUE(-11.5), SIMDE_FLOAT16_VALUE(10.2), SIMDE_FLOAT16_VALUE(-4.7), SIMDE_FLOAT16_VALUE(-4.7) }, + { SIMDE_FLOAT16_VALUE(11.5), SIMDE_FLOAT16_VALUE(8.5), SIMDE_FLOAT16_VALUE(-7.6), SIMDE_FLOAT16_VALUE(-5.1), + SIMDE_FLOAT16_VALUE(11.6), SIMDE_FLOAT16_VALUE(-0.3), SIMDE_FLOAT16_VALUE(-12.7), SIMDE_FLOAT16_VALUE(2.7) }, + SIMDE_FLOAT16_VALUE(-1.3), + { SIMDE_FLOAT16_VALUE(-24.850), SIMDE_FLOAT16_VALUE(-8.850), SIMDE_FLOAT16_VALUE(4.980), SIMDE_FLOAT16_VALUE(-1.770), + SIMDE_FLOAT16_VALUE(-26.580), SIMDE_FLOAT16_VALUE(10.590), SIMDE_FLOAT16_VALUE(11.810), SIMDE_FLOAT16_VALUE(-8.210) } }, + { { SIMDE_FLOAT16_VALUE(0.5), SIMDE_FLOAT16_VALUE(-11.3), SIMDE_FLOAT16_VALUE(-12.3), SIMDE_FLOAT16_VALUE(4.1), + SIMDE_FLOAT16_VALUE(-13.3), SIMDE_FLOAT16_VALUE(-14.4), SIMDE_FLOAT16_VALUE(4.8), SIMDE_FLOAT16_VALUE(3.6) }, + { SIMDE_FLOAT16_VALUE(5.4), SIMDE_FLOAT16_VALUE(13.3), SIMDE_FLOAT16_VALUE(7.0), SIMDE_FLOAT16_VALUE(13.2), + SIMDE_FLOAT16_VALUE(-0.6), SIMDE_FLOAT16_VALUE(-3.6), SIMDE_FLOAT16_VALUE(14.4), SIMDE_FLOAT16_VALUE(9.6) }, + SIMDE_FLOAT16_VALUE(1.5), + { SIMDE_FLOAT16_VALUE(8.600), SIMDE_FLOAT16_VALUE(8.650), SIMDE_FLOAT16_VALUE(-1.800), SIMDE_FLOAT16_VALUE(23.900), + SIMDE_FLOAT16_VALUE(-14.200), SIMDE_FLOAT16_VALUE(-19.800), SIMDE_FLOAT16_VALUE(26.400), SIMDE_FLOAT16_VALUE(18.000) } }, + { { SIMDE_FLOAT16_VALUE(-4.5), SIMDE_FLOAT16_VALUE(11.4), SIMDE_FLOAT16_VALUE(-3.5), SIMDE_FLOAT16_VALUE(6.4), + SIMDE_FLOAT16_VALUE(-13.2), SIMDE_FLOAT16_VALUE(9.6), SIMDE_FLOAT16_VALUE(-4.8), SIMDE_FLOAT16_VALUE(-0.6) }, + { SIMDE_FLOAT16_VALUE(-10.6), SIMDE_FLOAT16_VALUE(9.2), SIMDE_FLOAT16_VALUE(-10.0), SIMDE_FLOAT16_VALUE(-6.4), + SIMDE_FLOAT16_VALUE(8.2), SIMDE_FLOAT16_VALUE(9.8), SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(-7.0) }, + SIMDE_FLOAT16_VALUE(-4.1), + { SIMDE_FLOAT16_VALUE(38.960), SIMDE_FLOAT16_VALUE(-26.320), SIMDE_FLOAT16_VALUE(37.500), SIMDE_FLOAT16_VALUE(32.640), + SIMDE_FLOAT16_VALUE(-46.820), SIMDE_FLOAT16_VALUE(-30.580), SIMDE_FLOAT16_VALUE(-10.540), SIMDE_FLOAT16_VALUE(28.100) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16 c = test_vec[i].c; + simde_float16x8_t r = simde_vfmaq_n_f16(a, b, c); + + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + } + + return 0; +} + static int test_simde_vfma_n_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -283,9 +402,11 @@ test_simde_vfmaq_n_f64 (SIMDE_MUNIT_TEST_ARGS) { } SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vfma_n_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vfma_n_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vfma_n_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vfmaq_n_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vfmaq_n_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vfmaq_n_f64) SIMDE_TEST_FUNC_LIST_END diff --git a/test/arm/neon/fms.c b/test/arm/neon/fms.c new file mode 100644 index 000000000..28df9a475 --- /dev/null +++ b/test/arm/neon/fms.c @@ -0,0 +1,322 @@ +#define SIMDE_TEST_ARM_NEON_INSN fms + +#include "test-neon.h" +#include "../../../simde/arm/neon/fms.h" + +static int +test_simde_vfms_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32 a[2]; + simde_float32 b[2]; + simde_float32 c[2]; + simde_float32 r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(-6.859), SIMDE_FLOAT32_C(-4.171) }, + { SIMDE_FLOAT32_C(62.983), SIMDE_FLOAT32_C(36.473) }, + { SIMDE_FLOAT32_C(46.440), SIMDE_FLOAT32_C(-44.437) }, + { SIMDE_FLOAT32_C(-2931.744), SIMDE_FLOAT32_C(1616.580) } }, + { { SIMDE_FLOAT32_C(-78.537), SIMDE_FLOAT32_C(-0.301) }, + { SIMDE_FLOAT32_C(28.907), SIMDE_FLOAT32_C(91.206) }, + { SIMDE_FLOAT32_C(74.886), SIMDE_FLOAT32_C(53.243) }, + { SIMDE_FLOAT32_C(-2243.273), SIMDE_FLOAT32_C(-4856.334) } }, + { { SIMDE_FLOAT32_C(-66.494), SIMDE_FLOAT32_C(18.998) }, + { SIMDE_FLOAT32_C(-6.524), SIMDE_FLOAT32_C(-37.838) }, + { SIMDE_FLOAT32_C(40.686), SIMDE_FLOAT32_C(39.559) }, + { SIMDE_FLOAT32_C(198.930), SIMDE_FLOAT32_C(1515.812) } }, + { { SIMDE_FLOAT32_C(84.990), SIMDE_FLOAT32_C(-13.936) }, + { SIMDE_FLOAT32_C(-67.575), SIMDE_FLOAT32_C(66.105) }, + { SIMDE_FLOAT32_C(-21.256), SIMDE_FLOAT32_C(-47.685) }, + { SIMDE_FLOAT32_C(-1351.393), SIMDE_FLOAT32_C(3138.295) } }, + { { SIMDE_FLOAT32_C(-35.378), SIMDE_FLOAT32_C(99.361) }, + { SIMDE_FLOAT32_C(14.103), SIMDE_FLOAT32_C(-22.877) }, + { SIMDE_FLOAT32_C(85.253), SIMDE_FLOAT32_C(-64.930) }, + { SIMDE_FLOAT32_C(-1237.693), SIMDE_FLOAT32_C(-1386.043) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); + simde_float32x2_t c = simde_vld1_f32(test_vec[i].c); + simde_float32x2_t r = simde_vfms_f32(a, b, c); + + simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); + } + + return 0; +} + +static int +test_simde_vfmsh_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[1]; + simde_float16 b[1]; + simde_float16 c[1]; + simde_float16 r[1]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(0.09) }, + { SIMDE_FLOAT16_VALUE(6.34) }, + { SIMDE_FLOAT16_VALUE(7.45) }, + { SIMDE_FLOAT16_VALUE(-47.15) } }, + { { SIMDE_FLOAT16_VALUE(0.91) }, + { SIMDE_FLOAT16_VALUE(6.33) }, + { SIMDE_FLOAT16_VALUE(-6.82) }, + { SIMDE_FLOAT16_VALUE(44.04) } }, + { { SIMDE_FLOAT16_VALUE(4.06) }, + { SIMDE_FLOAT16_VALUE(8.76) }, + { SIMDE_FLOAT16_VALUE(-6.22) }, + { SIMDE_FLOAT16_VALUE(58.53) } }, + { { SIMDE_FLOAT16_VALUE(-2.60) }, + { SIMDE_FLOAT16_VALUE(-7.57) }, + { SIMDE_FLOAT16_VALUE(2.23) }, + { SIMDE_FLOAT16_VALUE(14.28) } }, + { { SIMDE_FLOAT16_VALUE(2.38) }, + { SIMDE_FLOAT16_VALUE(6.31) }, + { SIMDE_FLOAT16_VALUE(-4.22) }, + { SIMDE_FLOAT16_VALUE(29.02) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16_t r = simde_vfmsh_f16(test_vec[i].a[0], test_vec[i].b[0], test_vec[i].c[0]); + simde_assert_equal_f16(r, test_vec[i].r[0], 1); + } + + return 0; +} + +static int +test_simde_vfms_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + simde_float16 b[4]; + simde_float16 c[4]; + simde_float16 r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-5.06), SIMDE_FLOAT16_VALUE(1.00), SIMDE_FLOAT16_VALUE(-5.98), SIMDE_FLOAT16_VALUE(-5.93) }, + { SIMDE_FLOAT16_VALUE(7.00), SIMDE_FLOAT16_VALUE(-6.45), SIMDE_FLOAT16_VALUE(4.77), SIMDE_FLOAT16_VALUE(6.04) }, + { SIMDE_FLOAT16_VALUE(-5.47), SIMDE_FLOAT16_VALUE(-8.56), SIMDE_FLOAT16_VALUE(-2.72), SIMDE_FLOAT16_VALUE(-3.45) }, + { SIMDE_FLOAT16_VALUE(33.22), SIMDE_FLOAT16_VALUE(-54.18), SIMDE_FLOAT16_VALUE(7.00), SIMDE_FLOAT16_VALUE(14.92) } }, + { { SIMDE_FLOAT16_VALUE(7.48), SIMDE_FLOAT16_VALUE(2.81), SIMDE_FLOAT16_VALUE(-8.73), SIMDE_FLOAT16_VALUE(-0.41) }, + { SIMDE_FLOAT16_VALUE(5.51), SIMDE_FLOAT16_VALUE(7.18), SIMDE_FLOAT16_VALUE(4.90), SIMDE_FLOAT16_VALUE(9.89) }, + { SIMDE_FLOAT16_VALUE(7.61), SIMDE_FLOAT16_VALUE(9.16), SIMDE_FLOAT16_VALUE(-7.10), SIMDE_FLOAT16_VALUE(7.09) }, + { SIMDE_FLOAT16_VALUE(-34.39), SIMDE_FLOAT16_VALUE(-62.99), SIMDE_FLOAT16_VALUE(26.09), SIMDE_FLOAT16_VALUE(-70.56) } }, + { { SIMDE_FLOAT16_VALUE(-4.12), SIMDE_FLOAT16_VALUE(-5.55), SIMDE_FLOAT16_VALUE(-2.55), SIMDE_FLOAT16_VALUE(2.44) }, + { SIMDE_FLOAT16_VALUE(7.30), SIMDE_FLOAT16_VALUE(-1.85), SIMDE_FLOAT16_VALUE(2.75), SIMDE_FLOAT16_VALUE(-7.74) }, + { SIMDE_FLOAT16_VALUE(-6.85), SIMDE_FLOAT16_VALUE(5.38), SIMDE_FLOAT16_VALUE(6.40), SIMDE_FLOAT16_VALUE(2.82) }, + { SIMDE_FLOAT16_VALUE(45.87), SIMDE_FLOAT16_VALUE(4.42), SIMDE_FLOAT16_VALUE(-20.12), SIMDE_FLOAT16_VALUE(24.26) } }, + { { SIMDE_FLOAT16_VALUE(-0.60), SIMDE_FLOAT16_VALUE(-0.89), SIMDE_FLOAT16_VALUE(-5.24), SIMDE_FLOAT16_VALUE(0.14) }, + { SIMDE_FLOAT16_VALUE(2.33), SIMDE_FLOAT16_VALUE(8.55), SIMDE_FLOAT16_VALUE(-9.60), SIMDE_FLOAT16_VALUE(0.89) }, + { SIMDE_FLOAT16_VALUE(6.42), SIMDE_FLOAT16_VALUE(0.94), SIMDE_FLOAT16_VALUE(2.34), SIMDE_FLOAT16_VALUE(-3.48) }, + { SIMDE_FLOAT16_VALUE(-15.54), SIMDE_FLOAT16_VALUE(-8.94), SIMDE_FLOAT16_VALUE(17.21), SIMDE_FLOAT16_VALUE(3.23) } }, + { { SIMDE_FLOAT16_VALUE(-3.15), SIMDE_FLOAT16_VALUE(-9.19), SIMDE_FLOAT16_VALUE(8.90), SIMDE_FLOAT16_VALUE(-6.16) }, + { SIMDE_FLOAT16_VALUE(-0.15), SIMDE_FLOAT16_VALUE(-8.15), SIMDE_FLOAT16_VALUE(7.12), SIMDE_FLOAT16_VALUE(6.74) }, + { SIMDE_FLOAT16_VALUE(-7.04), SIMDE_FLOAT16_VALUE(-8.06), SIMDE_FLOAT16_VALUE(-4.59), SIMDE_FLOAT16_VALUE(-9.11) }, + { SIMDE_FLOAT16_VALUE(-4.23), SIMDE_FLOAT16_VALUE(-74.82), SIMDE_FLOAT16_VALUE(41.59), SIMDE_FLOAT16_VALUE(55.24) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x4_t c = simde_vld1_f16(test_vec[i].c); + simde_float16x4_t r = simde_vfms_f16(a, b, c); + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + } + + return 0; +} + +static int +test_simde_vfmsq_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[8]; + simde_float16 b[8]; + simde_float16 c[8]; + simde_float16 r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-8.08), SIMDE_FLOAT16_VALUE(5.53), SIMDE_FLOAT16_VALUE(9.08), SIMDE_FLOAT16_VALUE(-8.83), + SIMDE_FLOAT16_VALUE(-8.48), SIMDE_FLOAT16_VALUE(-0.88), SIMDE_FLOAT16_VALUE(0.79), SIMDE_FLOAT16_VALUE(2.97) }, + { SIMDE_FLOAT16_VALUE(-1.45), SIMDE_FLOAT16_VALUE(-0.39), SIMDE_FLOAT16_VALUE(-0.77), SIMDE_FLOAT16_VALUE(1.96), + SIMDE_FLOAT16_VALUE(1.24), SIMDE_FLOAT16_VALUE(0.79), SIMDE_FLOAT16_VALUE(-2.41), SIMDE_FLOAT16_VALUE(-5.27) }, + { SIMDE_FLOAT16_VALUE(-7.34), SIMDE_FLOAT16_VALUE(0.74), SIMDE_FLOAT16_VALUE(-6.18), SIMDE_FLOAT16_VALUE(2.56), + SIMDE_FLOAT16_VALUE(2.99), SIMDE_FLOAT16_VALUE(0.82), SIMDE_FLOAT16_VALUE(-3.39), SIMDE_FLOAT16_VALUE(8.24) }, + { SIMDE_FLOAT16_VALUE(-18.76), SIMDE_FLOAT16_VALUE(5.82), SIMDE_FLOAT16_VALUE(4.33), SIMDE_FLOAT16_VALUE(-13.84), + SIMDE_FLOAT16_VALUE(-12.20), SIMDE_FLOAT16_VALUE(-1.53), SIMDE_FLOAT16_VALUE(-7.39), SIMDE_FLOAT16_VALUE(46.36) } }, + { { SIMDE_FLOAT16_VALUE(-2.70), SIMDE_FLOAT16_VALUE(-1.01), SIMDE_FLOAT16_VALUE(-6.23), SIMDE_FLOAT16_VALUE(-2.18), + SIMDE_FLOAT16_VALUE(-4.69), SIMDE_FLOAT16_VALUE(0.78), SIMDE_FLOAT16_VALUE(4.44), SIMDE_FLOAT16_VALUE(-6.66) }, + { SIMDE_FLOAT16_VALUE(-0.03), SIMDE_FLOAT16_VALUE(4.35), SIMDE_FLOAT16_VALUE(6.19), SIMDE_FLOAT16_VALUE(7.68), + SIMDE_FLOAT16_VALUE(1.64), SIMDE_FLOAT16_VALUE(-9.10), SIMDE_FLOAT16_VALUE(8.87), SIMDE_FLOAT16_VALUE(-7.30) }, + { SIMDE_FLOAT16_VALUE(2.90), SIMDE_FLOAT16_VALUE(2.90), SIMDE_FLOAT16_VALUE(5.52), SIMDE_FLOAT16_VALUE(-9.62), + SIMDE_FLOAT16_VALUE(7.96), SIMDE_FLOAT16_VALUE(8.39), SIMDE_FLOAT16_VALUE(-3.30), SIMDE_FLOAT16_VALUE(-3.74) }, + { SIMDE_FLOAT16_VALUE(-2.62), SIMDE_FLOAT16_VALUE(-13.61), SIMDE_FLOAT16_VALUE(-40.39), SIMDE_FLOAT16_VALUE(71.75), + SIMDE_FLOAT16_VALUE(-17.72), SIMDE_FLOAT16_VALUE(77.16), SIMDE_FLOAT16_VALUE(33.68), SIMDE_FLOAT16_VALUE(-33.95) } }, + { { SIMDE_FLOAT16_VALUE(-5.37), SIMDE_FLOAT16_VALUE(-1.73), SIMDE_FLOAT16_VALUE(-2.07), SIMDE_FLOAT16_VALUE(-6.97), + SIMDE_FLOAT16_VALUE(9.13), SIMDE_FLOAT16_VALUE(4.52), SIMDE_FLOAT16_VALUE(-4.05), SIMDE_FLOAT16_VALUE(-1.56) }, + { SIMDE_FLOAT16_VALUE(-9.20), SIMDE_FLOAT16_VALUE(4.04), SIMDE_FLOAT16_VALUE(-0.34), SIMDE_FLOAT16_VALUE(2.13), + SIMDE_FLOAT16_VALUE(7.25), SIMDE_FLOAT16_VALUE(3.52), SIMDE_FLOAT16_VALUE(1.71), SIMDE_FLOAT16_VALUE(0.27) }, + { SIMDE_FLOAT16_VALUE(-6.37), SIMDE_FLOAT16_VALUE(-0.00), SIMDE_FLOAT16_VALUE(7.48), SIMDE_FLOAT16_VALUE(4.49), + SIMDE_FLOAT16_VALUE(-5.38), SIMDE_FLOAT16_VALUE(2.16), SIMDE_FLOAT16_VALUE(1.77), SIMDE_FLOAT16_VALUE(-5.79) }, + { SIMDE_FLOAT16_VALUE(-63.95), SIMDE_FLOAT16_VALUE(-1.72), SIMDE_FLOAT16_VALUE(0.51), SIMDE_FLOAT16_VALUE(-16.53), + SIMDE_FLOAT16_VALUE(48.16), SIMDE_FLOAT16_VALUE(-3.07), SIMDE_FLOAT16_VALUE(-7.08), SIMDE_FLOAT16_VALUE(-0.00) } }, + { { SIMDE_FLOAT16_VALUE(1.08), SIMDE_FLOAT16_VALUE(-6.76), SIMDE_FLOAT16_VALUE(-1.98), SIMDE_FLOAT16_VALUE(-7.78), + SIMDE_FLOAT16_VALUE(8.11), SIMDE_FLOAT16_VALUE(9.14), SIMDE_FLOAT16_VALUE(3.44), SIMDE_FLOAT16_VALUE(-8.74) }, + { SIMDE_FLOAT16_VALUE(-5.53), SIMDE_FLOAT16_VALUE(-7.57), SIMDE_FLOAT16_VALUE(6.59), SIMDE_FLOAT16_VALUE(8.55), + SIMDE_FLOAT16_VALUE(7.31), SIMDE_FLOAT16_VALUE(6.04), SIMDE_FLOAT16_VALUE(-3.28), SIMDE_FLOAT16_VALUE(-9.75) }, + { SIMDE_FLOAT16_VALUE(-8.87), SIMDE_FLOAT16_VALUE(2.67), SIMDE_FLOAT16_VALUE(-3.31), SIMDE_FLOAT16_VALUE(5.58), + SIMDE_FLOAT16_VALUE(1.88), SIMDE_FLOAT16_VALUE(9.00), SIMDE_FLOAT16_VALUE(4.85), SIMDE_FLOAT16_VALUE(1.27) }, + { SIMDE_FLOAT16_VALUE(-47.96), SIMDE_FLOAT16_VALUE(13.46), SIMDE_FLOAT16_VALUE(19.84), SIMDE_FLOAT16_VALUE(-55.49), + SIMDE_FLOAT16_VALUE(-5.59), SIMDE_FLOAT16_VALUE(-45.27), SIMDE_FLOAT16_VALUE(19.34), SIMDE_FLOAT16_VALUE(3.61) } }, + { { SIMDE_FLOAT16_VALUE(5.70), SIMDE_FLOAT16_VALUE(-1.13), SIMDE_FLOAT16_VALUE(-7.16), SIMDE_FLOAT16_VALUE(8.89), + SIMDE_FLOAT16_VALUE(-2.15), SIMDE_FLOAT16_VALUE(7.87), SIMDE_FLOAT16_VALUE(1.57), SIMDE_FLOAT16_VALUE(-3.28) }, + { SIMDE_FLOAT16_VALUE(4.75), SIMDE_FLOAT16_VALUE(-3.18), SIMDE_FLOAT16_VALUE(-3.52), SIMDE_FLOAT16_VALUE(-3.92), + SIMDE_FLOAT16_VALUE(1.52), SIMDE_FLOAT16_VALUE(3.24), SIMDE_FLOAT16_VALUE(-0.93), SIMDE_FLOAT16_VALUE(-5.49) }, + { SIMDE_FLOAT16_VALUE(3.74), SIMDE_FLOAT16_VALUE(-4.26), SIMDE_FLOAT16_VALUE(-5.61), SIMDE_FLOAT16_VALUE(0.73), + SIMDE_FLOAT16_VALUE(6.19), SIMDE_FLOAT16_VALUE(1.82), SIMDE_FLOAT16_VALUE(-6.11), SIMDE_FLOAT16_VALUE(-2.53) }, + { SIMDE_FLOAT16_VALUE(-12.05), SIMDE_FLOAT16_VALUE(-14.68), SIMDE_FLOAT16_VALUE(-26.86), SIMDE_FLOAT16_VALUE(11.75), + SIMDE_FLOAT16_VALUE(-11.58), SIMDE_FLOAT16_VALUE(1.96), SIMDE_FLOAT16_VALUE(-4.12), SIMDE_FLOAT16_VALUE(-17.16) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16x8_t c = simde_vld1q_f16(test_vec[i].c); + simde_float16x8_t r = simde_vfmsq_f16(a, b, c); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + } + + return 0; +} + +static int +test_simde_vfmsq_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32 a[4]; + simde_float32 b[4]; + simde_float32 c[4]; + simde_float32 r[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(11.728), SIMDE_FLOAT32_C(-3.160), SIMDE_FLOAT32_C(-73.675), SIMDE_FLOAT32_C(-55.160) }, + { SIMDE_FLOAT32_C(-79.001), SIMDE_FLOAT32_C(33.513), SIMDE_FLOAT32_C(-38.214), SIMDE_FLOAT32_C(25.761) }, + { SIMDE_FLOAT32_C(-59.372), SIMDE_FLOAT32_C(-98.050), SIMDE_FLOAT32_C(-4.140), SIMDE_FLOAT32_C(49.699) }, + { SIMDE_FLOAT32_C(-4678.761), SIMDE_FLOAT32_C(3282.793), SIMDE_FLOAT32_C(-231.866), SIMDE_FLOAT32_C(-1335.459) } }, + { { SIMDE_FLOAT32_C(-56.348), SIMDE_FLOAT32_C(85.125), SIMDE_FLOAT32_C(-53.766), SIMDE_FLOAT32_C(79.650) }, + { SIMDE_FLOAT32_C(87.579), SIMDE_FLOAT32_C(82.774), SIMDE_FLOAT32_C(13.728), SIMDE_FLOAT32_C(-4.889) }, + { SIMDE_FLOAT32_C(53.559), SIMDE_FLOAT32_C(-65.719), SIMDE_FLOAT32_C(-20.103), SIMDE_FLOAT32_C(-26.465) }, + { SIMDE_FLOAT32_C(-4747.023), SIMDE_FLOAT32_C(5524.965), SIMDE_FLOAT32_C(222.204), SIMDE_FLOAT32_C(-49.727) } }, + { { SIMDE_FLOAT32_C(30.720), SIMDE_FLOAT32_C(-31.318), SIMDE_FLOAT32_C(46.467), SIMDE_FLOAT32_C(41.647) }, + { SIMDE_FLOAT32_C(73.255), SIMDE_FLOAT32_C(6.717), SIMDE_FLOAT32_C(68.941), SIMDE_FLOAT32_C(63.059) }, + { SIMDE_FLOAT32_C(-40.771), SIMDE_FLOAT32_C(56.428), SIMDE_FLOAT32_C(76.311), SIMDE_FLOAT32_C(72.002) }, + { SIMDE_FLOAT32_C(3017.407), SIMDE_FLOAT32_C(-410.370), SIMDE_FLOAT32_C(-5214.498), SIMDE_FLOAT32_C(-4498.696) } }, + { { SIMDE_FLOAT32_C(96.006), SIMDE_FLOAT32_C(-59.005), SIMDE_FLOAT32_C(49.375), SIMDE_FLOAT32_C(23.831) }, + { SIMDE_FLOAT32_C(-2.415), SIMDE_FLOAT32_C(37.724), SIMDE_FLOAT32_C(-54.331), SIMDE_FLOAT32_C(-37.370) }, + { SIMDE_FLOAT32_C(27.351), SIMDE_FLOAT32_C(35.284), SIMDE_FLOAT32_C(-94.174), SIMDE_FLOAT32_C(-9.934) }, + { SIMDE_FLOAT32_C(162.061), SIMDE_FLOAT32_C(-1390.057), SIMDE_FLOAT32_C(-5067.154), SIMDE_FLOAT32_C(-347.416) } }, + { { SIMDE_FLOAT32_C(35.075), SIMDE_FLOAT32_C(98.215), SIMDE_FLOAT32_C(-67.862), SIMDE_FLOAT32_C(-17.767) }, + { SIMDE_FLOAT32_C(34.968), SIMDE_FLOAT32_C(-2.111), SIMDE_FLOAT32_C(26.986), SIMDE_FLOAT32_C(-48.323) }, + { SIMDE_FLOAT32_C(-44.816), SIMDE_FLOAT32_C(41.750), SIMDE_FLOAT32_C(64.936), SIMDE_FLOAT32_C(-56.652) }, + { SIMDE_FLOAT32_C(1602.228), SIMDE_FLOAT32_C(186.366), SIMDE_FLOAT32_C(-1820.231), SIMDE_FLOAT32_C(-2755.357) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); + simde_float32x4_t c = simde_vld1q_f32(test_vec[i].c); + simde_float32x4_t r = simde_vfmsq_f32(a, b, c); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + } + + return 0; +} + +static int +test_simde_vfms_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64 a[1]; + simde_float64 b[1]; + simde_float64 c[1]; + simde_float64 r[1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(-22.5837) }, + { SIMDE_FLOAT64_C(25.1395) }, + { SIMDE_FLOAT64_C(28.0482) }, + { SIMDE_FLOAT64_C(-727.6996) } }, + { { SIMDE_FLOAT64_C(-8.8549) }, + { SIMDE_FLOAT64_C(32.1518) }, + { SIMDE_FLOAT64_C(-67.5638) }, + { SIMDE_FLOAT64_C(2163.4474) } }, + { { SIMDE_FLOAT64_C(11.2151) }, + { SIMDE_FLOAT64_C(-70.5681) }, + { SIMDE_FLOAT64_C(17.0999) }, + { SIMDE_FLOAT64_C(1217.9242) } }, + { { SIMDE_FLOAT64_C(-38.9212) }, + { SIMDE_FLOAT64_C(-3.1983) }, + { SIMDE_FLOAT64_C(83.1114) }, + { SIMDE_FLOAT64_C(226.8946) } }, + { { SIMDE_FLOAT64_C(45.0481) }, + { SIMDE_FLOAT64_C(77.7681) }, + { SIMDE_FLOAT64_C(23.3105) }, + { SIMDE_FLOAT64_C(-1767.7640) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); + simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); + simde_float64x1_t c = simde_vld1_f64(test_vec[i].c); + simde_float64x1_t r = simde_vfms_f64(a, b, c); + + simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); + } + + return 0; +} + +static int +test_simde_vfmsq_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64 a[2]; + simde_float64 b[2]; + simde_float64 c[2]; + simde_float64 r[2]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(22.1439) }, + { SIMDE_FLOAT64_C(-768.2538) }, + { SIMDE_FLOAT64_C(-776.1350) }, + { SIMDE_FLOAT64_C(-596246.4890) } }, + { { SIMDE_FLOAT64_C(269.3649) }, + { SIMDE_FLOAT64_C(500.7283) }, + { SIMDE_FLOAT64_C(-366.7700) }, + { SIMDE_FLOAT64_C(183921.4769) } }, + { { SIMDE_FLOAT64_C(143.7781) }, + { SIMDE_FLOAT64_C(-820.1470) }, + { SIMDE_FLOAT64_C(508.8355) }, + { SIMDE_FLOAT64_C(417463.7229) } }, + { { SIMDE_FLOAT64_C(607.4635) }, + { SIMDE_FLOAT64_C(-554.7208) }, + { SIMDE_FLOAT64_C(-837.7806) }, + { SIMDE_FLOAT64_C(-464126.8948) } }, + { { SIMDE_FLOAT64_C(-820.7109) }, + { SIMDE_FLOAT64_C(-738.0983) }, + { SIMDE_FLOAT64_C(-463.0757) }, + { SIMDE_FLOAT64_C(-342616.1067) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); + simde_float64x2_t c = simde_vld1q_f64(test_vec[i].c); + simde_float64x2_t r = simde_vfmsq_f64(a, b, c); + simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); + } + + return 0; +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vfmsh_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vfms_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vfms_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vfms_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vfmsq_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vfmsq_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vfmsq_f64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/fms_lane.c b/test/arm/neon/fms_lane.c new file mode 100644 index 000000000..576e63cbb --- /dev/null +++ b/test/arm/neon/fms_lane.c @@ -0,0 +1,1213 @@ +#define SIMDE_TEST_ARM_NEON_INSN fms_lane + +#include "test-neon.h" +#include "../../../simde/arm/neon/fms_lane.h" + +static int +test_simde_vfmsd_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64_t a; + simde_float64_t b; + simde_float64_t v[1]; + simde_float64_t r; + } test_vec[] = { + { SIMDE_FLOAT64_C(5083.45820), + SIMDE_FLOAT64_C(-3347.28901), + { SIMDE_FLOAT64_C(-9966.81290) }, + SIMDE_FLOAT64_C(-33356719.80975) }, + { SIMDE_FLOAT64_C(3981.27307), + SIMDE_FLOAT64_C(2968.35655), + { SIMDE_FLOAT64_C(-2931.01175) }, + SIMDE_FLOAT64_C(8704269.19618) }, + { SIMDE_FLOAT64_C(-4088.29943), + SIMDE_FLOAT64_C(-3956.96859), + { SIMDE_FLOAT64_C(-2729.82603) }, + SIMDE_FLOAT64_C(-10805924.15163) }, + { SIMDE_FLOAT64_C(-2775.38215), + SIMDE_FLOAT64_C(6069.57489), + { SIMDE_FLOAT64_C(9040.55990) }, + SIMDE_FLOAT64_C(-54875130.73359) }, + { SIMDE_FLOAT64_C(9278.86644), + SIMDE_FLOAT64_C(-2329.30846), + { SIMDE_FLOAT64_C(6279.55264) }, + SIMDE_FLOAT64_C(14636293.96772) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64_t r = simde_vfmsd_lane_f64(test_vec[i].a, test_vec[i].b, simde_vld1_f64(test_vec[i].v), 0); + simde_assert_equal_f64(r, test_vec[i].r, 1); + } + + return 0; +} + +static int +test_simde_vfmsd_laneq_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64_t a; + simde_float64_t b; + simde_float64_t v[2]; + simde_float64_t r0; + simde_float64_t r1; + } test_vec[] = { + { SIMDE_FLOAT64_C(-3108.00660), + SIMDE_FLOAT64_C(-5521.68941), + { SIMDE_FLOAT64_C(1818.16262), SIMDE_FLOAT64_C(-9300.44809) }, + SIMDE_FLOAT64_C(10036221.29886), + SIMDE_FLOAT64_C(-51357293.73881) }, + { SIMDE_FLOAT64_C(8415.12967), + SIMDE_FLOAT64_C(1111.46983), + { SIMDE_FLOAT64_C(-1295.73632), SIMDE_FLOAT64_C(-4438.18975) }, + SIMDE_FLOAT64_C(1448586.96000), + SIMDE_FLOAT64_C(4941329.15670) }, + { SIMDE_FLOAT64_C(5884.59336), + SIMDE_FLOAT64_C(-1992.49187), + { SIMDE_FLOAT64_C(2407.80020), SIMDE_FLOAT64_C(2554.90830) }, + SIMDE_FLOAT64_C(4803406.90538), + SIMDE_FLOAT64_C(5096518.59537) }, + { SIMDE_FLOAT64_C(-2673.73366), + SIMDE_FLOAT64_C(-5819.36367), + { SIMDE_FLOAT64_C(5499.70467), SIMDE_FLOAT64_C(2027.19160) }, + SIMDE_FLOAT64_C(32002107.81655), + SIMDE_FLOAT64_C(11794291.42416) }, + { SIMDE_FLOAT64_C(-2883.16010), + SIMDE_FLOAT64_C(7115.86616), + { SIMDE_FLOAT64_C(5926.97189), SIMDE_FLOAT64_C(-6790.25503) }, + SIMDE_FLOAT64_C(-42178421.85403), + SIMDE_FLOAT64_C(48315662.79867) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2_t v = simde_vld1q_f64(test_vec[i].v); + simde_float64_t r0 = simde_vfmsd_laneq_f64(test_vec[i].a, test_vec[i].b, v, 0); + simde_float64_t r1 = simde_vfmsd_laneq_f64(test_vec[i].a, test_vec[i].b, v, 1); + simde_assert_equal_f64(r0, test_vec[i].r0, 1); + simde_assert_equal_f64(r1, test_vec[i].r1, 1); + } + + return 0; +} + +static int +test_simde_vfmsh_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a; + simde_float16_t b; + simde_float16_t v[4]; + simde_float16_t r0; + simde_float16_t r1; + simde_float16_t r2; + simde_float16_t r3; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE(-5.6), + SIMDE_FLOAT16_VALUE(9.8), + { SIMDE_FLOAT16_VALUE(1.0), SIMDE_FLOAT16_VALUE(8.9), SIMDE_FLOAT16_VALUE(-9.0), SIMDE_FLOAT16_VALUE(4.0) }, + SIMDE_FLOAT16_VALUE(-15.40), + SIMDE_FLOAT16_VALUE(-92.82), + SIMDE_FLOAT16_VALUE(82.60), + SIMDE_FLOAT16_VALUE(-44.80) }, + { SIMDE_FLOAT16_VALUE(6.4), + SIMDE_FLOAT16_VALUE(0.3), + { SIMDE_FLOAT16_VALUE(5.7), SIMDE_FLOAT16_VALUE(8.5), SIMDE_FLOAT16_VALUE(-6.9), SIMDE_FLOAT16_VALUE(6.2) }, + SIMDE_FLOAT16_VALUE(4.69), + SIMDE_FLOAT16_VALUE(3.85), + SIMDE_FLOAT16_VALUE(8.47), + SIMDE_FLOAT16_VALUE(4.54) }, + { SIMDE_FLOAT16_VALUE(4.6), + SIMDE_FLOAT16_VALUE(7.9), + { SIMDE_FLOAT16_VALUE(2.9), SIMDE_FLOAT16_VALUE(-5.8), SIMDE_FLOAT16_VALUE(-8.9), SIMDE_FLOAT16_VALUE(-1.4) }, + SIMDE_FLOAT16_VALUE(-18.31), + SIMDE_FLOAT16_VALUE(50.42), + SIMDE_FLOAT16_VALUE(74.91), + SIMDE_FLOAT16_VALUE(15.66) }, + { SIMDE_FLOAT16_VALUE(3.5), + SIMDE_FLOAT16_VALUE(9.7), + { SIMDE_FLOAT16_VALUE(-8.3), SIMDE_FLOAT16_VALUE(2.6), SIMDE_FLOAT16_VALUE(0.1), SIMDE_FLOAT16_VALUE(3.3) }, + SIMDE_FLOAT16_VALUE(84.01), + SIMDE_FLOAT16_VALUE(-21.72), + SIMDE_FLOAT16_VALUE(2.53), + SIMDE_FLOAT16_VALUE(-28.51) }, + { SIMDE_FLOAT16_VALUE(-4.7), + SIMDE_FLOAT16_VALUE(-9.2), + { SIMDE_FLOAT16_VALUE(-0.4), SIMDE_FLOAT16_VALUE(-1.1), SIMDE_FLOAT16_VALUE(5.1), SIMDE_FLOAT16_VALUE(0.3) }, + SIMDE_FLOAT16_VALUE(-8.38), + SIMDE_FLOAT16_VALUE(-14.82), + SIMDE_FLOAT16_VALUE(42.22), + SIMDE_FLOAT16_VALUE(-1.94) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t v = simde_vld1_f16(test_vec[i].v); + simde_float16_t r0 = simde_vfmsh_lane_f16(test_vec[i].a, test_vec[i].b, v, 0); + simde_float16_t r1 = simde_vfmsh_lane_f16(test_vec[i].a, test_vec[i].b, v, 1); + simde_float16_t r2 = simde_vfmsh_lane_f16(test_vec[i].a, test_vec[i].b, v, 2); + simde_float16_t r3 = simde_vfmsh_lane_f16(test_vec[i].a, test_vec[i].b, v, 3); + simde_assert_equal_f16(r0, test_vec[i].r0, 1); + simde_assert_equal_f16(r1, test_vec[i].r1, 1); + simde_assert_equal_f16(r2, test_vec[i].r2, 1); + simde_assert_equal_f16(r3, test_vec[i].r3, 1); + } + + return 0; +} + +static int +test_simde_vfmsh_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a; + simde_float16_t b; + simde_float16_t v[8]; + simde_float16_t r0; + simde_float16_t r1; + simde_float16_t r2; + simde_float16_t r3; + simde_float16_t r4; + simde_float16_t r5; + simde_float16_t r6; + simde_float16_t r7; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE(-6.7), + SIMDE_FLOAT16_VALUE(-9.3), + { SIMDE_FLOAT16_VALUE(-1.6), SIMDE_FLOAT16_VALUE(-9.4), SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(-1.0), + SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(3.1), SIMDE_FLOAT16_VALUE(0.7), SIMDE_FLOAT16_VALUE(9.4) }, + SIMDE_FLOAT16_VALUE(-21.58), + SIMDE_FLOAT16_VALUE(-94.12), + SIMDE_FLOAT16_VALUE(-2.98), + SIMDE_FLOAT16_VALUE(-16.00), + SIMDE_FLOAT16_VALUE(6.32), + SIMDE_FLOAT16_VALUE(22.13), + SIMDE_FLOAT16_VALUE(-0.19), + SIMDE_FLOAT16_VALUE(80.72) }, + { SIMDE_FLOAT16_VALUE(5.4), + SIMDE_FLOAT16_VALUE(3.0), + { SIMDE_FLOAT16_VALUE(3.8), SIMDE_FLOAT16_VALUE(5.2), SIMDE_FLOAT16_VALUE(-7.0), SIMDE_FLOAT16_VALUE(6.8), + SIMDE_FLOAT16_VALUE(-5.5), SIMDE_FLOAT16_VALUE(-6.3), SIMDE_FLOAT16_VALUE(7.4), SIMDE_FLOAT16_VALUE(6.8) }, + SIMDE_FLOAT16_VALUE(-6.00), + SIMDE_FLOAT16_VALUE(-10.20), + SIMDE_FLOAT16_VALUE(26.40), + SIMDE_FLOAT16_VALUE(-15.00), + SIMDE_FLOAT16_VALUE(21.90), + SIMDE_FLOAT16_VALUE(24.30), + SIMDE_FLOAT16_VALUE(-16.80), + SIMDE_FLOAT16_VALUE(-15.00) }, + { SIMDE_FLOAT16_VALUE(7.8), + SIMDE_FLOAT16_VALUE(0.3), + { SIMDE_FLOAT16_VALUE(-1.5), SIMDE_FLOAT16_VALUE(-0.5), SIMDE_FLOAT16_VALUE(2.5), SIMDE_FLOAT16_VALUE(9.2), + SIMDE_FLOAT16_VALUE(1.9), SIMDE_FLOAT16_VALUE(-8.0), SIMDE_FLOAT16_VALUE(5.9), SIMDE_FLOAT16_VALUE(2.0) }, + SIMDE_FLOAT16_VALUE(8.25), + SIMDE_FLOAT16_VALUE(7.95), + SIMDE_FLOAT16_VALUE(7.05), + SIMDE_FLOAT16_VALUE(5.04), + SIMDE_FLOAT16_VALUE(7.23), + SIMDE_FLOAT16_VALUE(10.20), + SIMDE_FLOAT16_VALUE(6.03), + SIMDE_FLOAT16_VALUE(7.20) }, + { SIMDE_FLOAT16_VALUE(7.9), + SIMDE_FLOAT16_VALUE(3.0), + { SIMDE_FLOAT16_VALUE(-6.6), SIMDE_FLOAT16_VALUE(8.5), SIMDE_FLOAT16_VALUE(-2.1), SIMDE_FLOAT16_VALUE(9.6), + SIMDE_FLOAT16_VALUE(8.0), SIMDE_FLOAT16_VALUE(-5.2), SIMDE_FLOAT16_VALUE(-8.5), SIMDE_FLOAT16_VALUE(-9.3) }, + SIMDE_FLOAT16_VALUE(27.70), + SIMDE_FLOAT16_VALUE(-17.60), + SIMDE_FLOAT16_VALUE(14.20), + SIMDE_FLOAT16_VALUE(-20.90), + SIMDE_FLOAT16_VALUE(-16.10), + SIMDE_FLOAT16_VALUE(23.50), + SIMDE_FLOAT16_VALUE(33.40), + SIMDE_FLOAT16_VALUE(35.80) }, + { SIMDE_FLOAT16_VALUE(8.3), + SIMDE_FLOAT16_VALUE(-0.2), + { SIMDE_FLOAT16_VALUE(1.6), SIMDE_FLOAT16_VALUE(9.0), SIMDE_FLOAT16_VALUE(8.7), SIMDE_FLOAT16_VALUE(1.4), + SIMDE_FLOAT16_VALUE(-7.1), SIMDE_FLOAT16_VALUE(-4.6), SIMDE_FLOAT16_VALUE(-9.6), SIMDE_FLOAT16_VALUE(-9.4) }, + SIMDE_FLOAT16_VALUE(8.62), + SIMDE_FLOAT16_VALUE(10.10), + SIMDE_FLOAT16_VALUE(10.04), + SIMDE_FLOAT16_VALUE(8.58), + SIMDE_FLOAT16_VALUE(6.88), + SIMDE_FLOAT16_VALUE(7.38), + SIMDE_FLOAT16_VALUE(6.38), + SIMDE_FLOAT16_VALUE(6.42) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t v = simde_vld1q_f16(test_vec[i].v); + simde_float16_t r0 = simde_vfmsh_laneq_f16(test_vec[i].a, test_vec[i].b, v, 0); + simde_float16_t r1 = simde_vfmsh_laneq_f16(test_vec[i].a, test_vec[i].b, v, 1); + simde_float16_t r2 = simde_vfmsh_laneq_f16(test_vec[i].a, test_vec[i].b, v, 2); + simde_float16_t r3 = simde_vfmsh_laneq_f16(test_vec[i].a, test_vec[i].b, v, 3); + simde_float16_t r4 = simde_vfmsh_laneq_f16(test_vec[i].a, test_vec[i].b, v, 4); + simde_float16_t r5 = simde_vfmsh_laneq_f16(test_vec[i].a, test_vec[i].b, v, 5); + simde_float16_t r6 = simde_vfmsh_laneq_f16(test_vec[i].a, test_vec[i].b, v, 6); + simde_float16_t r7 = simde_vfmsh_laneq_f16(test_vec[i].a, test_vec[i].b, v, 7); + + simde_assert_equal_f16(r0, test_vec[i].r0, 1); + simde_assert_equal_f16(r1, test_vec[i].r1, 1); + simde_assert_equal_f16(r2, test_vec[i].r2, 1); + simde_assert_equal_f16(r3, test_vec[i].r3, 1); + simde_assert_equal_f16(r4, test_vec[i].r4, 1); + simde_assert_equal_f16(r5, test_vec[i].r5, 1); + simde_assert_equal_f16(r6, test_vec[i].r6, 1); + simde_assert_equal_f16(r7, test_vec[i].r7, 1); + } + + return 0; +} + +static int +test_simde_vfmss_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32_t a; + simde_float32_t b; + simde_float32_t v[2]; + simde_float32_t r0; + simde_float32_t r1; + } test_vec[] = { + { SIMDE_FLOAT32_C(-245.78053), + SIMDE_FLOAT32_C(-510.31364), + { SIMDE_FLOAT32_C(-692.23531), SIMDE_FLOAT32_C(-878.41332) }, + SIMDE_FLOAT32_C(-353502.90372), + SIMDE_FLOAT32_C(-448512.08281) }, + { SIMDE_FLOAT32_C(-564.58993), + SIMDE_FLOAT32_C(-741.36855), + { SIMDE_FLOAT32_C(-36.47120), SIMDE_FLOAT32_C(56.49479) }, + SIMDE_FLOAT32_C(-27603.18998), + SIMDE_FLOAT32_C(41318.86775) }, + { SIMDE_FLOAT32_C(984.27762), + SIMDE_FLOAT32_C(-912.50208), + { SIMDE_FLOAT32_C(-876.43217), SIMDE_FLOAT32_C(694.61740) }, + SIMDE_FLOAT32_C(-798761.90387), + SIMDE_FLOAT32_C(634824.10714) }, + { SIMDE_FLOAT32_C(-431.23028), + SIMDE_FLOAT32_C(-695.27617), + { SIMDE_FLOAT32_C(966.19776), SIMDE_FLOAT32_C(503.58925) }, + SIMDE_FLOAT32_C(671343.04625), + SIMDE_FLOAT32_C(349702.37079) }, + { SIMDE_FLOAT32_C(-292.62265), + SIMDE_FLOAT32_C(957.48912), + { SIMDE_FLOAT32_C(229.60490), SIMDE_FLOAT32_C(-673.93248) }, + SIMDE_FLOAT32_C(-220136.81448), + SIMDE_FLOAT32_C(644990.39976) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t v = simde_vld1_f32(test_vec[i].v); + simde_float32_t r0 = simde_vfmss_lane_f32(test_vec[i].a, test_vec[i].b, v, 0); + simde_float32_t r1 = simde_vfmss_lane_f32(test_vec[i].a, test_vec[i].b, v, 1); + simde_assert_equal_f32(r0, test_vec[i].r0, 1); + simde_assert_equal_f32(r1, test_vec[i].r1, 1); + } + + return 0; +} + +static int +test_simde_vfmss_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32_t a; + simde_float32_t b; + simde_float32_t v[4]; + simde_float32_t r0; + simde_float32_t r1; + simde_float32_t r2; + simde_float32_t r3; + } test_vec[] = { + { SIMDE_FLOAT32_C(-543.69870), + SIMDE_FLOAT32_C(-921.77581), + { SIMDE_FLOAT32_C(-179.29631), SIMDE_FLOAT32_C(131.66650), SIMDE_FLOAT32_C(-896.40104), SIMDE_FLOAT32_C(-50.53601) }, + SIMDE_FLOAT32_C(-165814.70087), + SIMDE_FLOAT32_C(120823.29126), + SIMDE_FLOAT32_C(-826824.49470), + SIMDE_FLOAT32_C(-47126.56953) }, + { SIMDE_FLOAT32_C(630.92630), + SIMDE_FLOAT32_C(-294.49788), + { SIMDE_FLOAT32_C(542.77533), SIMDE_FLOAT32_C(-98.29697), SIMDE_FLOAT32_C(192.56576), SIMDE_FLOAT32_C(-510.86249) }, + SIMDE_FLOAT32_C(160477.11170), + SIMDE_FLOAT32_C(-28317.32299), + SIMDE_FLOAT32_C(57341.13617), + SIMDE_FLOAT32_C(-149816.99612) }, + { SIMDE_FLOAT32_C(335.92091), + SIMDE_FLOAT32_C(-781.41195), + { SIMDE_FLOAT32_C(-271.13463), SIMDE_FLOAT32_C(907.74971), SIMDE_FLOAT32_C(-327.73176), SIMDE_FLOAT32_C(-364.37127) }, + SIMDE_FLOAT32_C(-211531.91898), + SIMDE_FLOAT32_C(709662.39198), + SIMDE_FLOAT32_C(-255757.59162), + SIMDE_FLOAT32_C(-284388.14421) }, + { SIMDE_FLOAT32_C(-520.15321), + SIMDE_FLOAT32_C(-320.28896), + { SIMDE_FLOAT32_C(658.32259), SIMDE_FLOAT32_C(52.40700), SIMDE_FLOAT32_C(-107.97869), SIMDE_FLOAT32_C(153.98573) }, + SIMDE_FLOAT32_C(210333.30406), + SIMDE_FLOAT32_C(16265.22944), + SIMDE_FLOAT32_C(-35104.53670), + SIMDE_FLOAT32_C(48799.77512) }, + { SIMDE_FLOAT32_C(-504.12254), + SIMDE_FLOAT32_C(-451.10630), + { SIMDE_FLOAT32_C(428.44660), SIMDE_FLOAT32_C(598.14857), SIMDE_FLOAT32_C(475.06262), SIMDE_FLOAT32_C(-754.04418) }, + SIMDE_FLOAT32_C(192770.83751), + SIMDE_FLOAT32_C(269324.46435), + SIMDE_FLOAT32_C(213799.61931), + SIMDE_FLOAT32_C(-340658.20187) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t v = simde_vld1q_f32(test_vec[i].v); + simde_float32_t r0 = simde_vfmss_laneq_f32(test_vec[i].a, test_vec[i].b, v, 0); + simde_float32_t r1 = simde_vfmss_laneq_f32(test_vec[i].a, test_vec[i].b, v, 1); + simde_float32_t r2 = simde_vfmss_laneq_f32(test_vec[i].a, test_vec[i].b, v, 2); + simde_float32_t r3 = simde_vfmss_laneq_f32(test_vec[i].a, test_vec[i].b, v, 3); + + simde_assert_equal_f32(r0, test_vec[i].r0, 1); + simde_assert_equal_f32(r1, test_vec[i].r1, 1); + simde_assert_equal_f32(r2, test_vec[i].r2, 1); + simde_assert_equal_f32(r3, test_vec[i].r3, 1); + } + + return 0; +} + +static int +test_simde_vfms_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[4]; + simde_float16_t b[4]; + simde_float16_t v[4]; + simde_float16_t r0[4]; + simde_float16_t r1[4]; + simde_float16_t r2[4]; + simde_float16_t r3[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(7.4), SIMDE_FLOAT16_VALUE(-9.5), SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(-5.8) }, + { SIMDE_FLOAT16_VALUE(-4.5), SIMDE_FLOAT16_VALUE(1.5), SIMDE_FLOAT16_VALUE(-9.3), SIMDE_FLOAT16_VALUE(7.4) }, + { SIMDE_FLOAT16_VALUE(-2.3), SIMDE_FLOAT16_VALUE(-4.2), SIMDE_FLOAT16_VALUE(-9.9), SIMDE_FLOAT16_VALUE(4.1) }, + { SIMDE_FLOAT16_VALUE(-2.95), SIMDE_FLOAT16_VALUE(-6.05), SIMDE_FLOAT16_VALUE(-20.29), SIMDE_FLOAT16_VALUE(11.22) }, + { SIMDE_FLOAT16_VALUE(-11.50), SIMDE_FLOAT16_VALUE(-3.20), SIMDE_FLOAT16_VALUE(-37.96), SIMDE_FLOAT16_VALUE(25.28) }, + { SIMDE_FLOAT16_VALUE(-37.15), SIMDE_FLOAT16_VALUE(5.35), SIMDE_FLOAT16_VALUE(-90.87), SIMDE_FLOAT16_VALUE(67.46) }, + { SIMDE_FLOAT16_VALUE(25.85), SIMDE_FLOAT16_VALUE(-15.65), SIMDE_FLOAT16_VALUE(39.23), SIMDE_FLOAT16_VALUE(-36.14) } }, + { { SIMDE_FLOAT16_VALUE(-3.2), SIMDE_FLOAT16_VALUE(-0.5), SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(-4.7) }, + { SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(-6.9), SIMDE_FLOAT16_VALUE(-8.4), SIMDE_FLOAT16_VALUE(6.1) }, + { SIMDE_FLOAT16_VALUE(7.1), SIMDE_FLOAT16_VALUE(-3.9), SIMDE_FLOAT16_VALUE(2.5), SIMDE_FLOAT16_VALUE(-3.6) }, + { SIMDE_FLOAT16_VALUE(-11.01), SIMDE_FLOAT16_VALUE(48.49), SIMDE_FLOAT16_VALUE(59.94), SIMDE_FLOAT16_VALUE(-48.01) }, + { SIMDE_FLOAT16_VALUE(1.09), SIMDE_FLOAT16_VALUE(-27.41), SIMDE_FLOAT16_VALUE(-32.46), SIMDE_FLOAT16_VALUE(19.09) }, + { SIMDE_FLOAT16_VALUE(-5.95), SIMDE_FLOAT16_VALUE(16.75), SIMDE_FLOAT16_VALUE(21.30), SIMDE_FLOAT16_VALUE(-19.95) }, + { SIMDE_FLOAT16_VALUE(0.76), SIMDE_FLOAT16_VALUE(-25.34), SIMDE_FLOAT16_VALUE(-29.94), SIMDE_FLOAT16_VALUE(17.26) } }, + { { SIMDE_FLOAT16_VALUE(8.2), SIMDE_FLOAT16_VALUE(-5.4), SIMDE_FLOAT16_VALUE(4.3), SIMDE_FLOAT16_VALUE(3.9) }, + { SIMDE_FLOAT16_VALUE(-1.7), SIMDE_FLOAT16_VALUE(-5.8), SIMDE_FLOAT16_VALUE(-5.7), SIMDE_FLOAT16_VALUE(5.5) }, + { SIMDE_FLOAT16_VALUE(-2.7), SIMDE_FLOAT16_VALUE(-6.5), SIMDE_FLOAT16_VALUE(-1.9), SIMDE_FLOAT16_VALUE(-3.6) }, + { SIMDE_FLOAT16_VALUE(3.61), SIMDE_FLOAT16_VALUE(-21.06), SIMDE_FLOAT16_VALUE(-11.09), SIMDE_FLOAT16_VALUE(18.75) }, + { SIMDE_FLOAT16_VALUE(-2.85), SIMDE_FLOAT16_VALUE(-43.10), SIMDE_FLOAT16_VALUE(-32.75), SIMDE_FLOAT16_VALUE(39.65) }, + { SIMDE_FLOAT16_VALUE(4.97), SIMDE_FLOAT16_VALUE(-16.42), SIMDE_FLOAT16_VALUE(-6.53), SIMDE_FLOAT16_VALUE(14.35) }, + { SIMDE_FLOAT16_VALUE(2.08), SIMDE_FLOAT16_VALUE(-26.28), SIMDE_FLOAT16_VALUE(-16.22), SIMDE_FLOAT16_VALUE(23.70) } }, + { { SIMDE_FLOAT16_VALUE(9.1), SIMDE_FLOAT16_VALUE(-9.8), SIMDE_FLOAT16_VALUE(3.0), SIMDE_FLOAT16_VALUE(9.4) }, + { SIMDE_FLOAT16_VALUE(-2.4), SIMDE_FLOAT16_VALUE(4.3), SIMDE_FLOAT16_VALUE(9.0), SIMDE_FLOAT16_VALUE(0.8) }, + { SIMDE_FLOAT16_VALUE(-1.4), SIMDE_FLOAT16_VALUE(-6.9), SIMDE_FLOAT16_VALUE(-4.8), SIMDE_FLOAT16_VALUE(5.7) }, + { SIMDE_FLOAT16_VALUE(5.74), SIMDE_FLOAT16_VALUE(-3.78), SIMDE_FLOAT16_VALUE(15.60), SIMDE_FLOAT16_VALUE(10.52) }, + { SIMDE_FLOAT16_VALUE(-7.46), SIMDE_FLOAT16_VALUE(19.87), SIMDE_FLOAT16_VALUE(65.10), SIMDE_FLOAT16_VALUE(14.92) }, + { SIMDE_FLOAT16_VALUE(-2.42), SIMDE_FLOAT16_VALUE(10.84), SIMDE_FLOAT16_VALUE(46.20), SIMDE_FLOAT16_VALUE(13.24) }, + { SIMDE_FLOAT16_VALUE(22.78), SIMDE_FLOAT16_VALUE(-34.31), SIMDE_FLOAT16_VALUE(-48.30), SIMDE_FLOAT16_VALUE(4.84) } }, + { { SIMDE_FLOAT16_VALUE(4.9), SIMDE_FLOAT16_VALUE(9.4), SIMDE_FLOAT16_VALUE(3.3), SIMDE_FLOAT16_VALUE(-8.5) }, + { SIMDE_FLOAT16_VALUE(6.1), SIMDE_FLOAT16_VALUE(5.0), SIMDE_FLOAT16_VALUE(6.3), SIMDE_FLOAT16_VALUE(-5.8) }, + { SIMDE_FLOAT16_VALUE(6.9), SIMDE_FLOAT16_VALUE(-0.0), SIMDE_FLOAT16_VALUE(6.5), SIMDE_FLOAT16_VALUE(-9.2) }, + { SIMDE_FLOAT16_VALUE(-37.19), SIMDE_FLOAT16_VALUE(-25.10), SIMDE_FLOAT16_VALUE(-40.17), SIMDE_FLOAT16_VALUE(31.52) }, + { SIMDE_FLOAT16_VALUE(4.90), SIMDE_FLOAT16_VALUE(9.40), SIMDE_FLOAT16_VALUE(3.30), SIMDE_FLOAT16_VALUE(-8.50) }, + { SIMDE_FLOAT16_VALUE(-34.75), SIMDE_FLOAT16_VALUE(-23.10), SIMDE_FLOAT16_VALUE(-37.65), SIMDE_FLOAT16_VALUE(29.20) }, + { SIMDE_FLOAT16_VALUE(61.02), SIMDE_FLOAT16_VALUE(55.40), SIMDE_FLOAT16_VALUE(61.26), SIMDE_FLOAT16_VALUE(-61.86) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t v = simde_vld1_f16(test_vec[i].v); + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x4_t r0 = simde_vfms_lane_f16(a, b, v, 0); + simde_float16x4_t r1 = simde_vfms_lane_f16(a, b, v, 1); + simde_float16x4_t r2 = simde_vfms_lane_f16(a, b, v, 2); + simde_float16x4_t r3 = simde_vfms_lane_f16(a, b, v, 3); + simde_test_arm_neon_assert_equal_f16x4(r0, simde_vld1_f16(test_vec[i].r0), 1); + simde_test_arm_neon_assert_equal_f16x4(r1, simde_vld1_f16(test_vec[i].r1), 1); + simde_test_arm_neon_assert_equal_f16x4(r2, simde_vld1_f16(test_vec[i].r2), 1); + simde_test_arm_neon_assert_equal_f16x4(r3, simde_vld1_f16(test_vec[i].r3), 1); + } + + return 0; +} + +static int +test_simde_vfms_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32_t a[2]; + simde_float32_t b[2]; + simde_float32_t v[2]; + simde_float32_t r0[2]; + simde_float32_t r1[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(-122.86190), SIMDE_FLOAT32_C(876.09460) }, + { SIMDE_FLOAT32_C(-659.82491), SIMDE_FLOAT32_C(-582.51925) }, + { SIMDE_FLOAT32_C(192.61616), SIMDE_FLOAT32_C(-268.86836) }, + { SIMDE_FLOAT32_C(126970.07668), SIMDE_FLOAT32_C(113078.71313) }, + { SIMDE_FLOAT32_C(-177528.90336), SIMDE_FLOAT32_C(-155744.89961) } }, + { { SIMDE_FLOAT32_C(94.67060), SIMDE_FLOAT32_C(-520.91137) }, + { SIMDE_FLOAT32_C(-947.08934), SIMDE_FLOAT32_C(-694.08895) }, + { SIMDE_FLOAT32_C(178.67959), SIMDE_FLOAT32_C(-186.53816) }, + { SIMDE_FLOAT32_C(169320.20891), SIMDE_FLOAT32_C(123498.61992) }, + { SIMDE_FLOAT32_C(-176573.63370), SIMDE_FLOAT32_C(-129994.98787) } }, + { { SIMDE_FLOAT32_C(155.07091), SIMDE_FLOAT32_C(-342.36202) }, + { SIMDE_FLOAT32_C(-783.56502), SIMDE_FLOAT32_C(-177.49802) }, + { SIMDE_FLOAT32_C(-750.10136), SIMDE_FLOAT32_C(-316.79896) }, + { SIMDE_FLOAT32_C(-587598.11686), SIMDE_FLOAT32_C(-133483.87184) }, + { SIMDE_FLOAT32_C(-248077.51452), SIMDE_FLOAT32_C(-56573.55208) } }, + { { SIMDE_FLOAT32_C(57.11387), SIMDE_FLOAT32_C(575.00959) }, + { SIMDE_FLOAT32_C(-166.49087), SIMDE_FLOAT32_C(-456.70562) }, + { SIMDE_FLOAT32_C(334.78387), SIMDE_FLOAT32_C(-93.37312) }, + { SIMDE_FLOAT32_C(55795.57118), SIMDE_FLOAT32_C(153472.68523) }, + { SIMDE_FLOAT32_C(-15488.65812), SIMDE_FLOAT32_C(-42069.01964) } }, + { { SIMDE_FLOAT32_C(-884.10215), SIMDE_FLOAT32_C(465.54633) }, + { SIMDE_FLOAT32_C(-317.69692), SIMDE_FLOAT32_C(801.18637) }, + { SIMDE_FLOAT32_C(-930.09100), SIMDE_FLOAT32_C(-642.43853) }, + { SIMDE_FLOAT32_C(-296371.15224), SIMDE_FLOAT32_C(745641.77909) }, + { SIMDE_FLOAT32_C(-204984.84670), SIMDE_FLOAT32_C(515178.53928) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t v = simde_vld1_f32(test_vec[i].v); + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); + simde_float32x2_t r0 = simde_vfms_lane_f32(a, b, v, 0); + simde_float32x2_t r1 = simde_vfms_lane_f32(a, b, v, 1); + simde_test_arm_neon_assert_equal_f32x2(r0, simde_vld1_f32(test_vec[i].r0), 1); + simde_test_arm_neon_assert_equal_f32x2(r1, simde_vld1_f32(test_vec[i].r1), 1); + } + + return 0; +} + +static int +test_simde_vfms_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64_t a[1]; + simde_float64_t b[1]; + simde_float64_t v[1]; + simde_float64_t r[1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(3636.83577) }, + { SIMDE_FLOAT64_C(6175.08860) }, + { SIMDE_FLOAT64_C(7239.25714) }, + { SIMDE_FLOAT64_C(-44699417.39457) } }, + { { SIMDE_FLOAT64_C(-895.90190) }, + { SIMDE_FLOAT64_C(-6127.51825) }, + { SIMDE_FLOAT64_C(-6063.19315) }, + { SIMDE_FLOAT64_C(-37153222.57897) } }, + { { SIMDE_FLOAT64_C(7511.87882) }, + { SIMDE_FLOAT64_C(-3060.31431) }, + { SIMDE_FLOAT64_C(-8366.52902) }, + { SIMDE_FLOAT64_C(-25596696.63421) } }, + { { SIMDE_FLOAT64_C(-9672.14195) }, + { SIMDE_FLOAT64_C(-4610.28204) }, + { SIMDE_FLOAT64_C(-9678.81520) }, + { SIMDE_FLOAT64_C(-44631740.08271) } }, + { { SIMDE_FLOAT64_C(6971.33141) }, + { SIMDE_FLOAT64_C(7885.70448) }, + { SIMDE_FLOAT64_C(-4134.40167) }, + { SIMDE_FLOAT64_C(32609641.06601) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1_t v = simde_vld1_f64(test_vec[i].v); + simde_float64x1_t r = simde_vfms_lane_f64(simde_vld1_f64(test_vec[i].a), simde_vld1_f64(test_vec[i].b), v, 0); + simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); + } + + return 0; +} + +static int +test_simde_vfms_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[4]; + simde_float16_t b[4]; + simde_float16_t v[8]; + simde_float16_t r0[4]; + simde_float16_t r1[4]; + simde_float16_t r2[4]; + simde_float16_t r3[4]; + simde_float16_t r4[4]; + simde_float16_t r5[4]; + simde_float16_t r6[4]; + simde_float16_t r7[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-4.5), SIMDE_FLOAT16_VALUE(9.8), SIMDE_FLOAT16_VALUE(9.0), SIMDE_FLOAT16_VALUE(-9.5) }, + { SIMDE_FLOAT16_VALUE(4.1), SIMDE_FLOAT16_VALUE(1.0), SIMDE_FLOAT16_VALUE(9.2), SIMDE_FLOAT16_VALUE(8.3) }, + { SIMDE_FLOAT16_VALUE(9.1), SIMDE_FLOAT16_VALUE(-3.1), SIMDE_FLOAT16_VALUE(-2.0), SIMDE_FLOAT16_VALUE(-5.8), + SIMDE_FLOAT16_VALUE(-8.6), SIMDE_FLOAT16_VALUE(2.4), SIMDE_FLOAT16_VALUE(-1.5), SIMDE_FLOAT16_VALUE(-4.4) }, + { SIMDE_FLOAT16_VALUE(-41.81), SIMDE_FLOAT16_VALUE(0.70), SIMDE_FLOAT16_VALUE(-74.72), SIMDE_FLOAT16_VALUE(-85.03) }, + { SIMDE_FLOAT16_VALUE(8.21), SIMDE_FLOAT16_VALUE(12.90), SIMDE_FLOAT16_VALUE(37.52), SIMDE_FLOAT16_VALUE(16.23) }, + { SIMDE_FLOAT16_VALUE(3.70), SIMDE_FLOAT16_VALUE(11.80), SIMDE_FLOAT16_VALUE(27.40), SIMDE_FLOAT16_VALUE(7.10) }, + { SIMDE_FLOAT16_VALUE(19.28), SIMDE_FLOAT16_VALUE(15.60), SIMDE_FLOAT16_VALUE(62.36), SIMDE_FLOAT16_VALUE(38.64) }, + { SIMDE_FLOAT16_VALUE(30.76), SIMDE_FLOAT16_VALUE(18.40), SIMDE_FLOAT16_VALUE(88.12), SIMDE_FLOAT16_VALUE(61.88) }, + { SIMDE_FLOAT16_VALUE(-14.34), SIMDE_FLOAT16_VALUE(7.40), SIMDE_FLOAT16_VALUE(-13.08), SIMDE_FLOAT16_VALUE(-29.42) }, + { SIMDE_FLOAT16_VALUE(1.65), SIMDE_FLOAT16_VALUE(11.30), SIMDE_FLOAT16_VALUE(22.80), SIMDE_FLOAT16_VALUE(2.95) }, + { SIMDE_FLOAT16_VALUE(13.54), SIMDE_FLOAT16_VALUE(14.20), SIMDE_FLOAT16_VALUE(49.48), SIMDE_FLOAT16_VALUE(27.02) } }, + { { SIMDE_FLOAT16_VALUE(-2.4), SIMDE_FLOAT16_VALUE(0.5), SIMDE_FLOAT16_VALUE(1.7), SIMDE_FLOAT16_VALUE(-6.9) }, + { SIMDE_FLOAT16_VALUE(-8.6), SIMDE_FLOAT16_VALUE(8.4), SIMDE_FLOAT16_VALUE(6.0), SIMDE_FLOAT16_VALUE(3.7) }, + { SIMDE_FLOAT16_VALUE(4.3), SIMDE_FLOAT16_VALUE(-5.1), SIMDE_FLOAT16_VALUE(-4.0), SIMDE_FLOAT16_VALUE(5.4), + SIMDE_FLOAT16_VALUE(-7.1), SIMDE_FLOAT16_VALUE(8.3), SIMDE_FLOAT16_VALUE(7.7), SIMDE_FLOAT16_VALUE(0.2) }, + { SIMDE_FLOAT16_VALUE(34.58), SIMDE_FLOAT16_VALUE(-35.62), SIMDE_FLOAT16_VALUE(-24.10), SIMDE_FLOAT16_VALUE(-22.81) }, + { SIMDE_FLOAT16_VALUE(-46.26), SIMDE_FLOAT16_VALUE(43.34), SIMDE_FLOAT16_VALUE(32.30), SIMDE_FLOAT16_VALUE(11.97) }, + { SIMDE_FLOAT16_VALUE(-36.80), SIMDE_FLOAT16_VALUE(34.10), SIMDE_FLOAT16_VALUE(25.70), SIMDE_FLOAT16_VALUE(7.90) }, + { SIMDE_FLOAT16_VALUE(44.04), SIMDE_FLOAT16_VALUE(-44.86), SIMDE_FLOAT16_VALUE(-30.70), SIMDE_FLOAT16_VALUE(-26.88) }, + { SIMDE_FLOAT16_VALUE(-63.46), SIMDE_FLOAT16_VALUE(60.14), SIMDE_FLOAT16_VALUE(44.30), SIMDE_FLOAT16_VALUE(19.37) }, + { SIMDE_FLOAT16_VALUE(68.98), SIMDE_FLOAT16_VALUE(-69.22), SIMDE_FLOAT16_VALUE(-48.10), SIMDE_FLOAT16_VALUE(-37.61) }, + { SIMDE_FLOAT16_VALUE(63.82), SIMDE_FLOAT16_VALUE(-64.18), SIMDE_FLOAT16_VALUE(-44.50), SIMDE_FLOAT16_VALUE(-35.39) }, + { SIMDE_FLOAT16_VALUE(-0.68), SIMDE_FLOAT16_VALUE(-1.18), SIMDE_FLOAT16_VALUE(0.50), SIMDE_FLOAT16_VALUE(-7.64) } }, + { { SIMDE_FLOAT16_VALUE(-9.9), SIMDE_FLOAT16_VALUE(-2.3), SIMDE_FLOAT16_VALUE(1.3), SIMDE_FLOAT16_VALUE(-7.5) }, + { SIMDE_FLOAT16_VALUE(9.9), SIMDE_FLOAT16_VALUE(-7.5), SIMDE_FLOAT16_VALUE(-7.2), SIMDE_FLOAT16_VALUE(9.0) }, + { SIMDE_FLOAT16_VALUE(4.1), SIMDE_FLOAT16_VALUE(9.0), SIMDE_FLOAT16_VALUE(5.4), SIMDE_FLOAT16_VALUE(8.4), + SIMDE_FLOAT16_VALUE(8.3), SIMDE_FLOAT16_VALUE(-6.0), SIMDE_FLOAT16_VALUE(-0.7), SIMDE_FLOAT16_VALUE(4.8) }, + { SIMDE_FLOAT16_VALUE(-50.49), SIMDE_FLOAT16_VALUE(28.45), SIMDE_FLOAT16_VALUE(30.82), SIMDE_FLOAT16_VALUE(-44.40) }, + { SIMDE_FLOAT16_VALUE(-99.00), SIMDE_FLOAT16_VALUE(65.20), SIMDE_FLOAT16_VALUE(66.10), SIMDE_FLOAT16_VALUE(-88.50) }, + { SIMDE_FLOAT16_VALUE(-63.36), SIMDE_FLOAT16_VALUE(38.20), SIMDE_FLOAT16_VALUE(40.18), SIMDE_FLOAT16_VALUE(-56.10) }, + { SIMDE_FLOAT16_VALUE(-93.06), SIMDE_FLOAT16_VALUE(60.70), SIMDE_FLOAT16_VALUE(61.78), SIMDE_FLOAT16_VALUE(-83.10) }, + { SIMDE_FLOAT16_VALUE(-92.07), SIMDE_FLOAT16_VALUE(59.95), SIMDE_FLOAT16_VALUE(61.06), SIMDE_FLOAT16_VALUE(-82.20) }, + { SIMDE_FLOAT16_VALUE(49.50), SIMDE_FLOAT16_VALUE(-47.30), SIMDE_FLOAT16_VALUE(-41.90), SIMDE_FLOAT16_VALUE(46.50) }, + { SIMDE_FLOAT16_VALUE(-2.97), SIMDE_FLOAT16_VALUE(-7.55), SIMDE_FLOAT16_VALUE(-3.74), SIMDE_FLOAT16_VALUE(-1.20) }, + { SIMDE_FLOAT16_VALUE(-57.42), SIMDE_FLOAT16_VALUE(33.70), SIMDE_FLOAT16_VALUE(35.86), SIMDE_FLOAT16_VALUE(-50.70) } }, + { { SIMDE_FLOAT16_VALUE(9.1), SIMDE_FLOAT16_VALUE(6.6), SIMDE_FLOAT16_VALUE(-8.9), SIMDE_FLOAT16_VALUE(0.9) }, + { SIMDE_FLOAT16_VALUE(4.2), SIMDE_FLOAT16_VALUE(1.3), SIMDE_FLOAT16_VALUE(-4.6), SIMDE_FLOAT16_VALUE(-7.6) }, + { SIMDE_FLOAT16_VALUE(-3.3), SIMDE_FLOAT16_VALUE(-1.0), SIMDE_FLOAT16_VALUE(2.8), SIMDE_FLOAT16_VALUE(7.2), + SIMDE_FLOAT16_VALUE(6.0), SIMDE_FLOAT16_VALUE(-3.4), SIMDE_FLOAT16_VALUE(9.4), SIMDE_FLOAT16_VALUE(0.6) }, + { SIMDE_FLOAT16_VALUE(22.96), SIMDE_FLOAT16_VALUE(10.89), SIMDE_FLOAT16_VALUE(-24.08), SIMDE_FLOAT16_VALUE(-24.18) }, + { SIMDE_FLOAT16_VALUE(13.30), SIMDE_FLOAT16_VALUE(7.90), SIMDE_FLOAT16_VALUE(-13.50), SIMDE_FLOAT16_VALUE(-6.70) }, + { SIMDE_FLOAT16_VALUE(-2.66), SIMDE_FLOAT16_VALUE(2.96), SIMDE_FLOAT16_VALUE(3.98), SIMDE_FLOAT16_VALUE(22.18) }, + { SIMDE_FLOAT16_VALUE(-21.14), SIMDE_FLOAT16_VALUE(-2.76), SIMDE_FLOAT16_VALUE(24.22), SIMDE_FLOAT16_VALUE(55.62) }, + { SIMDE_FLOAT16_VALUE(-16.10), SIMDE_FLOAT16_VALUE(-1.20), SIMDE_FLOAT16_VALUE(18.70), SIMDE_FLOAT16_VALUE(46.50) }, + { SIMDE_FLOAT16_VALUE(23.38), SIMDE_FLOAT16_VALUE(11.02), SIMDE_FLOAT16_VALUE(-24.54), SIMDE_FLOAT16_VALUE(-24.94) }, + { SIMDE_FLOAT16_VALUE(-30.38), SIMDE_FLOAT16_VALUE(-5.62), SIMDE_FLOAT16_VALUE(34.34), SIMDE_FLOAT16_VALUE(72.34) }, + { SIMDE_FLOAT16_VALUE(6.58), SIMDE_FLOAT16_VALUE(5.82), SIMDE_FLOAT16_VALUE(-6.14), SIMDE_FLOAT16_VALUE(5.46) } }, + { { SIMDE_FLOAT16_VALUE(-9.4), SIMDE_FLOAT16_VALUE(6.2), SIMDE_FLOAT16_VALUE(7.8), SIMDE_FLOAT16_VALUE(-4.3) }, + { SIMDE_FLOAT16_VALUE(7.7), SIMDE_FLOAT16_VALUE(2.5), SIMDE_FLOAT16_VALUE(-6.0), SIMDE_FLOAT16_VALUE(-2.5) }, + { SIMDE_FLOAT16_VALUE(5.5), SIMDE_FLOAT16_VALUE(-0.5), SIMDE_FLOAT16_VALUE(2.1), SIMDE_FLOAT16_VALUE(-8.7), + SIMDE_FLOAT16_VALUE(-1.4), SIMDE_FLOAT16_VALUE(3.1), SIMDE_FLOAT16_VALUE(2.1), SIMDE_FLOAT16_VALUE(1.4) }, + { SIMDE_FLOAT16_VALUE(-51.75), SIMDE_FLOAT16_VALUE(-7.55), SIMDE_FLOAT16_VALUE(40.80), SIMDE_FLOAT16_VALUE(9.45) }, + { SIMDE_FLOAT16_VALUE(-5.55), SIMDE_FLOAT16_VALUE(7.45), SIMDE_FLOAT16_VALUE(4.80), SIMDE_FLOAT16_VALUE(-5.55) }, + { SIMDE_FLOAT16_VALUE(-25.57), SIMDE_FLOAT16_VALUE(0.95), SIMDE_FLOAT16_VALUE(20.40), SIMDE_FLOAT16_VALUE(0.95) }, + { SIMDE_FLOAT16_VALUE(57.59), SIMDE_FLOAT16_VALUE(27.95), SIMDE_FLOAT16_VALUE(-44.40), SIMDE_FLOAT16_VALUE(-26.05) }, + { SIMDE_FLOAT16_VALUE(1.38), SIMDE_FLOAT16_VALUE(9.70), SIMDE_FLOAT16_VALUE(-0.60), SIMDE_FLOAT16_VALUE(-7.80) }, + { SIMDE_FLOAT16_VALUE(-33.27), SIMDE_FLOAT16_VALUE(-1.55), SIMDE_FLOAT16_VALUE(26.40), SIMDE_FLOAT16_VALUE(3.45) }, + { SIMDE_FLOAT16_VALUE(-25.57), SIMDE_FLOAT16_VALUE(0.95), SIMDE_FLOAT16_VALUE(20.40), SIMDE_FLOAT16_VALUE(0.95) }, + { SIMDE_FLOAT16_VALUE(-20.18), SIMDE_FLOAT16_VALUE(2.70), SIMDE_FLOAT16_VALUE(16.20), SIMDE_FLOAT16_VALUE(-0.80) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t v = simde_vld1q_f16(test_vec[i].v); + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x4_t r0 = simde_vfms_laneq_f16(a, b, v, 0); + simde_float16x4_t r1 = simde_vfms_laneq_f16(a, b, v, 1); + simde_float16x4_t r2 = simde_vfms_laneq_f16(a, b, v, 2); + simde_float16x4_t r3 = simde_vfms_laneq_f16(a, b, v, 3); + simde_float16x4_t r4 = simde_vfms_laneq_f16(a, b, v, 4); + simde_float16x4_t r5 = simde_vfms_laneq_f16(a, b, v, 5); + simde_float16x4_t r6 = simde_vfms_laneq_f16(a, b, v, 6); + simde_float16x4_t r7 = simde_vfms_laneq_f16(a, b, v, 7); + simde_test_arm_neon_assert_equal_f16x4(r0, simde_vld1_f16(test_vec[i].r0), 1); + simde_test_arm_neon_assert_equal_f16x4(r1, simde_vld1_f16(test_vec[i].r1), 1); + simde_test_arm_neon_assert_equal_f16x4(r2, simde_vld1_f16(test_vec[i].r2), 1); + simde_test_arm_neon_assert_equal_f16x4(r3, simde_vld1_f16(test_vec[i].r3), 1); + simde_test_arm_neon_assert_equal_f16x4(r4, simde_vld1_f16(test_vec[i].r4), 1); + simde_test_arm_neon_assert_equal_f16x4(r5, simde_vld1_f16(test_vec[i].r5), 1); + simde_test_arm_neon_assert_equal_f16x4(r6, simde_vld1_f16(test_vec[i].r6), 1); + simde_test_arm_neon_assert_equal_f16x4(r7, simde_vld1_f16(test_vec[i].r7), 1); + } + + return 0; +} + +static int +test_simde_vfms_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32_t a[2]; + simde_float32_t b[2]; + simde_float32_t v[4]; + simde_float32_t r0[2]; + simde_float32_t r1[2]; + simde_float32_t r2[2]; + simde_float32_t r3[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(-153.43837), SIMDE_FLOAT32_C(503.74412) }, + { SIMDE_FLOAT32_C(-559.77714), SIMDE_FLOAT32_C(-959.16982) }, + { SIMDE_FLOAT32_C(-830.50913), SIMDE_FLOAT32_C(-66.51184), SIMDE_FLOAT32_C(453.22695), SIMDE_FLOAT32_C(-501.35099) }, + { SIMDE_FLOAT32_C(-465053.45990), SIMDE_FLOAT32_C(-796095.54394) }, + { SIMDE_FLOAT32_C(-37385.24475), SIMDE_FLOAT32_C(-63292.40360) }, + { SIMDE_FLOAT32_C(253552.64714), SIMDE_FLOAT32_C(435225.35681) }, + { SIMDE_FLOAT32_C(-280798.25826), SIMDE_FLOAT32_C(-480376.99017) } }, + { { SIMDE_FLOAT32_C(199.60276), SIMDE_FLOAT32_C(-541.95639) }, + { SIMDE_FLOAT32_C(218.73526), SIMDE_FLOAT32_C(109.17016) }, + { SIMDE_FLOAT32_C(-726.91525), SIMDE_FLOAT32_C(-486.28244), SIMDE_FLOAT32_C(284.07620), SIMDE_FLOAT32_C(379.83470) }, + { SIMDE_FLOAT32_C(159201.59736), SIMDE_FLOAT32_C(78815.49873) }, + { SIMDE_FLOAT32_C(106566.71823), SIMDE_FLOAT32_C(52545.57634) }, + { SIMDE_FLOAT32_C(-61937.87821), SIMDE_FLOAT32_C(-31554.60104) }, + { SIMDE_FLOAT32_C(-82883.63698), SIMDE_FLOAT32_C(-42008.57124) } }, + { { SIMDE_FLOAT32_C(-184.18353), SIMDE_FLOAT32_C(-43.69214) }, + { SIMDE_FLOAT32_C(-418.67591), SIMDE_FLOAT32_C(856.13203) }, + { SIMDE_FLOAT32_C(-754.73863), SIMDE_FLOAT32_C(638.29933), SIMDE_FLOAT32_C(927.38787), SIMDE_FLOAT32_C(669.07491) }, + { SIMDE_FLOAT32_C(-316175.06656), SIMDE_FLOAT32_C(646112.22444) }, + { SIMDE_FLOAT32_C(267056.36877), SIMDE_FLOAT32_C(-546512.19264) }, + { SIMDE_FLOAT32_C(388090.77539), SIMDE_FLOAT32_C(-794010.14955) }, + { SIMDE_FLOAT32_C(279941.36267), SIMDE_FLOAT32_C(-572860.15232) } }, + { { SIMDE_FLOAT32_C(389.52113), SIMDE_FLOAT32_C(707.84734) }, + { SIMDE_FLOAT32_C(958.05355), SIMDE_FLOAT32_C(150.29900) }, + { SIMDE_FLOAT32_C(120.28485), SIMDE_FLOAT32_C(11.61154), SIMDE_FLOAT32_C(-460.14664), SIMDE_FLOAT32_C(-838.47235) }, + { SIMDE_FLOAT32_C(-114849.80274), SIMDE_FLOAT32_C(-17370.84519) }, + { SIMDE_FLOAT32_C(-10734.95656), SIMDE_FLOAT32_C(-1037.35565) }, + { SIMDE_FLOAT32_C(441234.63871), SIMDE_FLOAT32_C(69867.42814) }, + { SIMDE_FLOAT32_C(803690.93246), SIMDE_FLOAT32_C(126729.40605) } }, + { { SIMDE_FLOAT32_C(-826.73092), SIMDE_FLOAT32_C(254.89718) }, + { SIMDE_FLOAT32_C(390.80161), SIMDE_FLOAT32_C(-199.96408) }, + { SIMDE_FLOAT32_C(-699.09639), SIMDE_FLOAT32_C(-542.77986), SIMDE_FLOAT32_C(-881.29501), SIMDE_FLOAT32_C(-872.41284) }, + { SIMDE_FLOAT32_C(272381.26620), SIMDE_FLOAT32_C(-139539.26674) }, + { SIMDE_FLOAT32_C(211292.51312), SIMDE_FLOAT32_C(-108281.57571) }, + { SIMDE_FLOAT32_C(343584.78183), SIMDE_FLOAT32_C(-175972.44601) }, + { SIMDE_FLOAT32_C(340113.61461), SIMDE_FLOAT32_C(-174196.33065) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t v = simde_vld1q_f32(test_vec[i].v); + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); + simde_float32x2_t r0 = simde_vfms_laneq_f32(a, b, v, 0); + simde_float32x2_t r1 = simde_vfms_laneq_f32(a, b, v, 1); + simde_float32x2_t r2 = simde_vfms_laneq_f32(a, b, v, 2); + simde_float32x2_t r3 = simde_vfms_laneq_f32(a, b, v, 3); + simde_test_arm_neon_assert_equal_f32x2(r0, simde_vld1_f32(test_vec[i].r0), 1); + simde_test_arm_neon_assert_equal_f32x2(r1, simde_vld1_f32(test_vec[i].r1), 1); + simde_test_arm_neon_assert_equal_f32x2(r2, simde_vld1_f32(test_vec[i].r2), 1); + simde_test_arm_neon_assert_equal_f32x2(r3, simde_vld1_f32(test_vec[i].r3), 1); + } + + return 0; +} + +static int +test_simde_vfms_laneq_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64_t a[1]; + simde_float64_t b[1]; + simde_float64_t v[2]; + simde_float64_t r0[1]; + simde_float64_t r1[1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(-3942.86616) }, + { SIMDE_FLOAT64_C(-9855.76058) }, + { SIMDE_FLOAT64_C(-1324.65337), SIMDE_FLOAT64_C(822.32748) }, + { SIMDE_FLOAT64_C(-13059409.32468) }, + { SIMDE_FLOAT64_C(8100719.88905) } }, + { { SIMDE_FLOAT64_C(-6732.42544) }, + { SIMDE_FLOAT64_C(2071.21955) }, + { SIMDE_FLOAT64_C(4414.34149), SIMDE_FLOAT64_C(-8374.81694) }, + { SIMDE_FLOAT64_C(-9149802.79669) }, + { SIMDE_FLOAT64_C(17339352.10646) } }, + { { SIMDE_FLOAT64_C(-1559.08339) }, + { SIMDE_FLOAT64_C(1412.37257) }, + { SIMDE_FLOAT64_C(-4430.92431), SIMDE_FLOAT64_C(3299.93935) }, + { SIMDE_FLOAT64_C(6256556.86291) }, + { SIMDE_FLOAT64_C(-4662302.89156) } }, + { { SIMDE_FLOAT64_C(2969.86908) }, + { SIMDE_FLOAT64_C(-5042.58214) }, + { SIMDE_FLOAT64_C(120.53521), SIMDE_FLOAT64_C(-1587.80482) }, + { SIMDE_FLOAT64_C(610778.54441) }, + { SIMDE_FLOAT64_C(-8003666.36062) } }, + { { SIMDE_FLOAT64_C(8478.26755) }, + { SIMDE_FLOAT64_C(1529.21578) }, + { SIMDE_FLOAT64_C(5553.07356), SIMDE_FLOAT64_C(5831.98402) }, + { SIMDE_FLOAT64_C(-8483369.43716) }, + { SIMDE_FLOAT64_C(-8909883.70655) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2_t v = simde_vld1q_f64(test_vec[i].v); + simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); + simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); + simde_float64x1_t r0 = simde_vfms_laneq_f64(a, b, v, 0); + simde_float64x1_t r1 = simde_vfms_laneq_f64(a, b, v, 1); + simde_test_arm_neon_assert_equal_f64x1(r0, simde_vld1_f64(test_vec[i].r0), 1); + simde_test_arm_neon_assert_equal_f64x1(r1, simde_vld1_f64(test_vec[i].r1), 1); + } + + return 0; +} + +static int +test_simde_vfmsq_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[8]; + simde_float16_t b[8]; + simde_float16_t v[4]; + simde_float16_t r0[8]; + simde_float16_t r1[8]; + simde_float16_t r2[8]; + simde_float16_t r3[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(7.2), SIMDE_FLOAT16_VALUE(5.1), SIMDE_FLOAT16_VALUE(-6.9), SIMDE_FLOAT16_VALUE(-1.9), + SIMDE_FLOAT16_VALUE(5.5), SIMDE_FLOAT16_VALUE(9.4), SIMDE_FLOAT16_VALUE(-7.9), SIMDE_FLOAT16_VALUE(9.0) }, + { SIMDE_FLOAT16_VALUE(8.0), SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(-9.0), SIMDE_FLOAT16_VALUE(-2.9), + SIMDE_FLOAT16_VALUE(-2.3), SIMDE_FLOAT16_VALUE(6.4), SIMDE_FLOAT16_VALUE(-3.6), SIMDE_FLOAT16_VALUE(-6.1) }, + { SIMDE_FLOAT16_VALUE(6.4), SIMDE_FLOAT16_VALUE(8.6), SIMDE_FLOAT16_VALUE(8.9), SIMDE_FLOAT16_VALUE(-8.5) }, + { SIMDE_FLOAT16_VALUE(-44.00), SIMDE_FLOAT16_VALUE(3.18), SIMDE_FLOAT16_VALUE(50.70), SIMDE_FLOAT16_VALUE(16.66), + SIMDE_FLOAT16_VALUE(20.22), SIMDE_FLOAT16_VALUE(-31.56), SIMDE_FLOAT16_VALUE(15.14), SIMDE_FLOAT16_VALUE(48.04) }, + { SIMDE_FLOAT16_VALUE(-61.60), SIMDE_FLOAT16_VALUE(2.52), SIMDE_FLOAT16_VALUE(70.50), SIMDE_FLOAT16_VALUE(23.04), + SIMDE_FLOAT16_VALUE(25.28), SIMDE_FLOAT16_VALUE(-45.64), SIMDE_FLOAT16_VALUE(23.06), SIMDE_FLOAT16_VALUE(61.46) }, + { SIMDE_FLOAT16_VALUE(-64.00), SIMDE_FLOAT16_VALUE(2.43), SIMDE_FLOAT16_VALUE(73.20), SIMDE_FLOAT16_VALUE(23.91), + SIMDE_FLOAT16_VALUE(25.97), SIMDE_FLOAT16_VALUE(-47.56), SIMDE_FLOAT16_VALUE(24.14), SIMDE_FLOAT16_VALUE(63.29) }, + { SIMDE_FLOAT16_VALUE(75.20), SIMDE_FLOAT16_VALUE(7.65), SIMDE_FLOAT16_VALUE(-83.40), SIMDE_FLOAT16_VALUE(-26.55), + SIMDE_FLOAT16_VALUE(-14.05), SIMDE_FLOAT16_VALUE(63.80), SIMDE_FLOAT16_VALUE(-38.50), SIMDE_FLOAT16_VALUE(-42.85) } }, + { { SIMDE_FLOAT16_VALUE(-3.6), SIMDE_FLOAT16_VALUE(-1.5), SIMDE_FLOAT16_VALUE(-5.1), SIMDE_FLOAT16_VALUE(-4.9), + SIMDE_FLOAT16_VALUE(-9.6), SIMDE_FLOAT16_VALUE(-9.3), SIMDE_FLOAT16_VALUE(-2.3), SIMDE_FLOAT16_VALUE(-1.1) }, + { SIMDE_FLOAT16_VALUE(6.7), SIMDE_FLOAT16_VALUE(-8.0), SIMDE_FLOAT16_VALUE(-9.4), SIMDE_FLOAT16_VALUE(8.3), + SIMDE_FLOAT16_VALUE(-9.0), SIMDE_FLOAT16_VALUE(-7.0), SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(-0.1) }, + { SIMDE_FLOAT16_VALUE(-2.5), SIMDE_FLOAT16_VALUE(-7.0), SIMDE_FLOAT16_VALUE(2.4), SIMDE_FLOAT16_VALUE(7.7) }, + { SIMDE_FLOAT16_VALUE(13.15), SIMDE_FLOAT16_VALUE(-21.50), SIMDE_FLOAT16_VALUE(-28.60), SIMDE_FLOAT16_VALUE(15.85), + SIMDE_FLOAT16_VALUE(-32.10), SIMDE_FLOAT16_VALUE(-26.80), SIMDE_FLOAT16_VALUE(-1.30), SIMDE_FLOAT16_VALUE(-1.35) }, + { SIMDE_FLOAT16_VALUE(43.30), SIMDE_FLOAT16_VALUE(-57.50), SIMDE_FLOAT16_VALUE(-70.90), SIMDE_FLOAT16_VALUE(53.20), + SIMDE_FLOAT16_VALUE(-72.60), SIMDE_FLOAT16_VALUE(-58.30), SIMDE_FLOAT16_VALUE(0.50), SIMDE_FLOAT16_VALUE(-1.80) }, + { SIMDE_FLOAT16_VALUE(-19.68), SIMDE_FLOAT16_VALUE(17.70), SIMDE_FLOAT16_VALUE(17.46), SIMDE_FLOAT16_VALUE(-24.82), + SIMDE_FLOAT16_VALUE(12.00), SIMDE_FLOAT16_VALUE(7.50), SIMDE_FLOAT16_VALUE(-3.26), SIMDE_FLOAT16_VALUE(-0.86) }, + { SIMDE_FLOAT16_VALUE(-55.19), SIMDE_FLOAT16_VALUE(60.10), SIMDE_FLOAT16_VALUE(67.28), SIMDE_FLOAT16_VALUE(-68.81), + SIMDE_FLOAT16_VALUE(59.70), SIMDE_FLOAT16_VALUE(44.60), SIMDE_FLOAT16_VALUE(-5.38), SIMDE_FLOAT16_VALUE(-0.33) } }, + { { SIMDE_FLOAT16_VALUE(-8.6), SIMDE_FLOAT16_VALUE(-2.7), SIMDE_FLOAT16_VALUE(-6.8), SIMDE_FLOAT16_VALUE(-0.6), + SIMDE_FLOAT16_VALUE(-2.6), SIMDE_FLOAT16_VALUE(-7.1), SIMDE_FLOAT16_VALUE(-8.5), SIMDE_FLOAT16_VALUE(2.5) }, + { SIMDE_FLOAT16_VALUE(4.9), SIMDE_FLOAT16_VALUE(-0.6), SIMDE_FLOAT16_VALUE(2.5), SIMDE_FLOAT16_VALUE(6.4), + SIMDE_FLOAT16_VALUE(6.9), SIMDE_FLOAT16_VALUE(-8.7), SIMDE_FLOAT16_VALUE(-4.6), SIMDE_FLOAT16_VALUE(3.9) }, + { SIMDE_FLOAT16_VALUE(7.8), SIMDE_FLOAT16_VALUE(0.1), SIMDE_FLOAT16_VALUE(9.2), SIMDE_FLOAT16_VALUE(-7.4) }, + { SIMDE_FLOAT16_VALUE(-46.82), SIMDE_FLOAT16_VALUE(1.98), SIMDE_FLOAT16_VALUE(-26.30), SIMDE_FLOAT16_VALUE(-50.52), + SIMDE_FLOAT16_VALUE(-56.42), SIMDE_FLOAT16_VALUE(60.76), SIMDE_FLOAT16_VALUE(27.38), SIMDE_FLOAT16_VALUE(-27.92) }, + { SIMDE_FLOAT16_VALUE(-9.09), SIMDE_FLOAT16_VALUE(-2.64), SIMDE_FLOAT16_VALUE(-7.05), SIMDE_FLOAT16_VALUE(-1.24), + SIMDE_FLOAT16_VALUE(-3.29), SIMDE_FLOAT16_VALUE(-6.23), SIMDE_FLOAT16_VALUE(-8.04), SIMDE_FLOAT16_VALUE(2.11) }, + { SIMDE_FLOAT16_VALUE(-53.68), SIMDE_FLOAT16_VALUE(2.82), SIMDE_FLOAT16_VALUE(-29.80), SIMDE_FLOAT16_VALUE(-59.48), + SIMDE_FLOAT16_VALUE(-66.08), SIMDE_FLOAT16_VALUE(72.94), SIMDE_FLOAT16_VALUE(33.82), SIMDE_FLOAT16_VALUE(-33.38) }, + { SIMDE_FLOAT16_VALUE(27.66), SIMDE_FLOAT16_VALUE(-7.14), SIMDE_FLOAT16_VALUE(11.70), SIMDE_FLOAT16_VALUE(46.76), + SIMDE_FLOAT16_VALUE(48.46), SIMDE_FLOAT16_VALUE(-71.48), SIMDE_FLOAT16_VALUE(-42.54), SIMDE_FLOAT16_VALUE(31.36) } }, + { { SIMDE_FLOAT16_VALUE(9.1), SIMDE_FLOAT16_VALUE(0.5), SIMDE_FLOAT16_VALUE(-8.6), SIMDE_FLOAT16_VALUE(3.7), + SIMDE_FLOAT16_VALUE(2.3), SIMDE_FLOAT16_VALUE(7.2), SIMDE_FLOAT16_VALUE(5.3), SIMDE_FLOAT16_VALUE(-7.9) }, + { SIMDE_FLOAT16_VALUE(-0.1), SIMDE_FLOAT16_VALUE(-2.9), SIMDE_FLOAT16_VALUE(-3.2), SIMDE_FLOAT16_VALUE(-5.0), + SIMDE_FLOAT16_VALUE(0.7), SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(1.0), SIMDE_FLOAT16_VALUE(7.2) }, + { SIMDE_FLOAT16_VALUE(-1.1), SIMDE_FLOAT16_VALUE(0.8), SIMDE_FLOAT16_VALUE(-3.6), SIMDE_FLOAT16_VALUE(-4.3) }, + { SIMDE_FLOAT16_VALUE(8.99), SIMDE_FLOAT16_VALUE(-2.69), SIMDE_FLOAT16_VALUE(-12.12), SIMDE_FLOAT16_VALUE(-1.80), + SIMDE_FLOAT16_VALUE(3.07), SIMDE_FLOAT16_VALUE(8.41), SIMDE_FLOAT16_VALUE(6.40), SIMDE_FLOAT16_VALUE(0.02) }, + { SIMDE_FLOAT16_VALUE(9.18), SIMDE_FLOAT16_VALUE(2.82), SIMDE_FLOAT16_VALUE(-6.04), SIMDE_FLOAT16_VALUE(7.70), + SIMDE_FLOAT16_VALUE(1.74), SIMDE_FLOAT16_VALUE(6.32), SIMDE_FLOAT16_VALUE(4.50), SIMDE_FLOAT16_VALUE(-13.66) }, + { SIMDE_FLOAT16_VALUE(8.74), SIMDE_FLOAT16_VALUE(-9.94), SIMDE_FLOAT16_VALUE(-20.12), SIMDE_FLOAT16_VALUE(-14.30), + SIMDE_FLOAT16_VALUE(4.82), SIMDE_FLOAT16_VALUE(11.16), SIMDE_FLOAT16_VALUE(8.90), SIMDE_FLOAT16_VALUE(18.02) }, + { SIMDE_FLOAT16_VALUE(8.67), SIMDE_FLOAT16_VALUE(-11.97), SIMDE_FLOAT16_VALUE(-22.36), SIMDE_FLOAT16_VALUE(-17.80), + SIMDE_FLOAT16_VALUE(5.31), SIMDE_FLOAT16_VALUE(11.93), SIMDE_FLOAT16_VALUE(9.60), SIMDE_FLOAT16_VALUE(23.06) } }, + { { SIMDE_FLOAT16_VALUE(2.6), SIMDE_FLOAT16_VALUE(7.6), SIMDE_FLOAT16_VALUE(0.9), SIMDE_FLOAT16_VALUE(4.6), + SIMDE_FLOAT16_VALUE(-9.5), SIMDE_FLOAT16_VALUE(-4.1), SIMDE_FLOAT16_VALUE(-1.3), SIMDE_FLOAT16_VALUE(3.7) }, + { SIMDE_FLOAT16_VALUE(10.0), SIMDE_FLOAT16_VALUE(4.4), SIMDE_FLOAT16_VALUE(2.9), SIMDE_FLOAT16_VALUE(-0.2), + SIMDE_FLOAT16_VALUE(-4.8), SIMDE_FLOAT16_VALUE(4.2), SIMDE_FLOAT16_VALUE(-5.3), SIMDE_FLOAT16_VALUE(8.1) }, + { SIMDE_FLOAT16_VALUE(3.3), SIMDE_FLOAT16_VALUE(4.5), SIMDE_FLOAT16_VALUE(-2.7), SIMDE_FLOAT16_VALUE(-7.2) }, + { SIMDE_FLOAT16_VALUE(-30.40), SIMDE_FLOAT16_VALUE(-6.92), SIMDE_FLOAT16_VALUE(-8.67), SIMDE_FLOAT16_VALUE(5.26), + SIMDE_FLOAT16_VALUE(6.34), SIMDE_FLOAT16_VALUE(-17.96), SIMDE_FLOAT16_VALUE(16.19), SIMDE_FLOAT16_VALUE(-23.03) }, + { SIMDE_FLOAT16_VALUE(-42.40), SIMDE_FLOAT16_VALUE(-12.20), SIMDE_FLOAT16_VALUE(-12.15), SIMDE_FLOAT16_VALUE(5.50), + SIMDE_FLOAT16_VALUE(12.10), SIMDE_FLOAT16_VALUE(-23.00), SIMDE_FLOAT16_VALUE(22.55), SIMDE_FLOAT16_VALUE(-32.75) }, + { SIMDE_FLOAT16_VALUE(29.60), SIMDE_FLOAT16_VALUE(19.48), SIMDE_FLOAT16_VALUE(8.73), SIMDE_FLOAT16_VALUE(4.06), + SIMDE_FLOAT16_VALUE(-22.46), SIMDE_FLOAT16_VALUE(7.24), SIMDE_FLOAT16_VALUE(-15.61), SIMDE_FLOAT16_VALUE(25.57) }, + { SIMDE_FLOAT16_VALUE(74.60), SIMDE_FLOAT16_VALUE(39.28), SIMDE_FLOAT16_VALUE(21.78), SIMDE_FLOAT16_VALUE(3.16), + SIMDE_FLOAT16_VALUE(-44.06), SIMDE_FLOAT16_VALUE(26.14), SIMDE_FLOAT16_VALUE(-39.46), SIMDE_FLOAT16_VALUE(62.02) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t v = simde_vld1_f16(test_vec[i].v); + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16x8_t r0 = simde_vfmsq_lane_f16(a, b, v, 0); + simde_float16x8_t r1 = simde_vfmsq_lane_f16(a, b, v, 1); + simde_float16x8_t r2 = simde_vfmsq_lane_f16(a, b, v, 2); + simde_float16x8_t r3 = simde_vfmsq_lane_f16(a, b, v, 3); + simde_test_arm_neon_assert_equal_f16x8(r0, simde_vld1q_f16(test_vec[i].r0), 1); + simde_test_arm_neon_assert_equal_f16x8(r1, simde_vld1q_f16(test_vec[i].r1), 1); + simde_test_arm_neon_assert_equal_f16x8(r2, simde_vld1q_f16(test_vec[i].r2), 1); + simde_test_arm_neon_assert_equal_f16x8(r3, simde_vld1q_f16(test_vec[i].r3), 1); + } + + return 0; +} + +static int +test_simde_vfmsq_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32_t a[4]; + simde_float32_t b[4]; + simde_float32_t v[2]; + simde_float32_t r0[4]; + simde_float32_t r1[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(259.18173), SIMDE_FLOAT32_C(-982.71656), SIMDE_FLOAT32_C(201.04464), SIMDE_FLOAT32_C(-839.59556) }, + { SIMDE_FLOAT32_C(-687.10042), SIMDE_FLOAT32_C(-248.92711), SIMDE_FLOAT32_C(499.97356), SIMDE_FLOAT32_C(96.80549) }, + { SIMDE_FLOAT32_C(-737.85009), SIMDE_FLOAT32_C(806.59386) }, + { SIMDE_FLOAT32_C(-506717.92770), SIMDE_FLOAT32_C(-184653.60658), SIMDE_FLOAT32_C(369106.58433), SIMDE_FLOAT32_C(70588.34750) }, + { SIMDE_FLOAT32_C(554470.16094), SIMDE_FLOAT32_C(199800.36006), SIMDE_FLOAT32_C(-403074.56009), SIMDE_FLOAT32_C(-78922.31276) } }, + { { SIMDE_FLOAT32_C(-776.04346), SIMDE_FLOAT32_C(312.16529), SIMDE_FLOAT32_C(-469.69252), SIMDE_FLOAT32_C(-755.31740) }, + { SIMDE_FLOAT32_C(-350.36425), SIMDE_FLOAT32_C(20.89342), SIMDE_FLOAT32_C(289.86478), SIMDE_FLOAT32_C(421.10273) }, + { SIMDE_FLOAT32_C(718.18823), SIMDE_FLOAT32_C(-40.43155) }, + { SIMDE_FLOAT32_C(250851.43555), SIMDE_FLOAT32_C(-14693.24359), SIMDE_FLOAT32_C(-208647.17036), SIMDE_FLOAT32_C(-303186.34346) }, + { SIMDE_FLOAT32_C(-14941.81202), SIMDE_FLOAT32_C(1156.91861), SIMDE_FLOAT32_C(11249.98922), SIMDE_FLOAT32_C(16270.51753) } }, + { { SIMDE_FLOAT32_C(-17.44808), SIMDE_FLOAT32_C(993.72564), SIMDE_FLOAT32_C(-520.55389), SIMDE_FLOAT32_C(-398.75283) }, + { SIMDE_FLOAT32_C(822.00589), SIMDE_FLOAT32_C(-197.83084), SIMDE_FLOAT32_C(498.70719), SIMDE_FLOAT32_C(280.02367) }, + { SIMDE_FLOAT32_C(476.70933), SIMDE_FLOAT32_C(-342.78566) }, + { SIMDE_FLOAT32_C(-391875.32565), SIMDE_FLOAT32_C(95301.53498), SIMDE_FLOAT32_C(-238258.92417), SIMDE_FLOAT32_C(-133888.64830) }, + { SIMDE_FLOAT32_C(281754.38103), SIMDE_FLOAT32_C(-66819.85030), SIMDE_FLOAT32_C(170429.11759), SIMDE_FLOAT32_C(95589.34431) } }, + { { SIMDE_FLOAT32_C(-984.28741), SIMDE_FLOAT32_C(709.24134), SIMDE_FLOAT32_C(-294.78043), SIMDE_FLOAT32_C(863.91755) }, + { SIMDE_FLOAT32_C(341.83247), SIMDE_FLOAT32_C(-465.47732), SIMDE_FLOAT32_C(-676.53834), SIMDE_FLOAT32_C(565.41989) }, + { SIMDE_FLOAT32_C(-776.90809), SIMDE_FLOAT32_C(925.68351) }, + { SIMDE_FLOAT32_C(264588.12256), SIMDE_FLOAT32_C(-360923.85159), SIMDE_FLOAT32_C(-525902.89051), SIMDE_FLOAT32_C(440143.20642) }, + { SIMDE_FLOAT32_C(-317412.96628), SIMDE_FLOAT32_C(431593.91738), SIMDE_FLOAT32_C(625965.60519), SIMDE_FLOAT32_C(-522535.95314) } }, + { { SIMDE_FLOAT32_C(-234.32520), SIMDE_FLOAT32_C(-246.62651), SIMDE_FLOAT32_C(673.43551), SIMDE_FLOAT32_C(11.13586) }, + { SIMDE_FLOAT32_C(732.20479), SIMDE_FLOAT32_C(99.78277), SIMDE_FLOAT32_C(-699.98007), SIMDE_FLOAT32_C(903.21751) }, + { SIMDE_FLOAT32_C(-579.16182), SIMDE_FLOAT32_C(-231.83252) }, + { SIMDE_FLOAT32_C(423830.73321), SIMDE_FLOAT32_C(57543.74672), SIMDE_FLOAT32_C(-404728.29923), SIMDE_FLOAT32_C(523120.23775) }, + { SIMDE_FLOAT32_C(169514.55746), SIMDE_FLOAT32_C(22886.26570), SIMDE_FLOAT32_C(-161604.71058), SIMDE_FLOAT32_C(209406.33076) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t v = simde_vld1_f32(test_vec[i].v); + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); + simde_float32x4_t r0 = simde_vfmsq_lane_f32(a, b, v, 0); + simde_float32x4_t r1 = simde_vfmsq_lane_f32(a, b, v, 1); + simde_test_arm_neon_assert_equal_f32x4(r0, simde_vld1q_f32(test_vec[i].r0), 1); + simde_test_arm_neon_assert_equal_f32x4(r1, simde_vld1q_f32(test_vec[i].r1), 1); + } + + return 0; +} + +static int +test_simde_vfmsq_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64_t a[2]; + simde_float64_t b[2]; + simde_float64_t v[1]; + simde_float64_t r0[2]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(821.28925), SIMDE_FLOAT64_C(-6342.69479) }, + { SIMDE_FLOAT64_C(9166.38887), SIMDE_FLOAT64_C(-5790.51734) }, + { SIMDE_FLOAT64_C(-8262.79545) }, + { SIMDE_FLOAT64_C(75740817.56570), SIMDE_FLOAT64_C(-47852203.03573) } }, + { { SIMDE_FLOAT64_C(-5785.12562), SIMDE_FLOAT64_C(4386.72108) }, + { SIMDE_FLOAT64_C(-687.24970), SIMDE_FLOAT64_C(5209.94217) }, + { SIMDE_FLOAT64_C(3279.55066) }, + { SIMDE_FLOAT64_C(2248085.06744), SIMDE_FLOAT64_C(-17081882.53329) } }, + { { SIMDE_FLOAT64_C(8966.81503), SIMDE_FLOAT64_C(-850.87370) }, + { SIMDE_FLOAT64_C(-6534.17860), SIMDE_FLOAT64_C(8038.77118) }, + { SIMDE_FLOAT64_C(9244.64824) }, + { SIMDE_FLOAT64_C(60415149.50670), SIMDE_FLOAT64_C(-74316462.72382) } }, + { { SIMDE_FLOAT64_C(-5290.76223), SIMDE_FLOAT64_C(4570.18419) }, + { SIMDE_FLOAT64_C(-7944.64530), SIMDE_FLOAT64_C(-541.83675) }, + { SIMDE_FLOAT64_C(1903.77031) }, + { SIMDE_FLOAT64_C(15119489.05151), SIMDE_FLOAT64_C(1036102.89044) } }, + { { SIMDE_FLOAT64_C(1856.12192), SIMDE_FLOAT64_C(1962.01446) }, + { SIMDE_FLOAT64_C(-7579.59161), SIMDE_FLOAT64_C(-5421.01685) }, + { SIMDE_FLOAT64_C(-209.85312) }, + { SIMDE_FLOAT64_C(-1588744.85381), SIMDE_FLOAT64_C(-1135655.30540) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1_t v = simde_vld1_f64(test_vec[i].v); + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); + simde_float64x2_t r0 = simde_vfmsq_lane_f64(a, b, v, 0); + simde_test_arm_neon_assert_equal_f64x2(r0, simde_vld1q_f64(test_vec[i].r0), 1); + } + + return 0; +} + +static int +test_simde_vfmsq_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[8]; + simde_float16_t b[8]; + simde_float16_t v[8]; + simde_float16_t r0[8]; + simde_float16_t r1[8]; + simde_float16_t r2[8]; + simde_float16_t r3[8]; + simde_float16_t r4[8]; + simde_float16_t r5[8]; + simde_float16_t r6[8]; + simde_float16_t r7[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-3.3), SIMDE_FLOAT16_VALUE(2.3), SIMDE_FLOAT16_VALUE(2.6), SIMDE_FLOAT16_VALUE(7.1), + SIMDE_FLOAT16_VALUE(4.9), SIMDE_FLOAT16_VALUE(-0.6), SIMDE_FLOAT16_VALUE(7.4), SIMDE_FLOAT16_VALUE(9.3) }, + { SIMDE_FLOAT16_VALUE(-9.7), SIMDE_FLOAT16_VALUE(9.6), SIMDE_FLOAT16_VALUE(-9.9), SIMDE_FLOAT16_VALUE(0.0), + SIMDE_FLOAT16_VALUE(3.3), SIMDE_FLOAT16_VALUE(-4.9), SIMDE_FLOAT16_VALUE(-4.6), SIMDE_FLOAT16_VALUE(7.7) }, + { SIMDE_FLOAT16_VALUE(-1.3), SIMDE_FLOAT16_VALUE(0.7), SIMDE_FLOAT16_VALUE(-9.7), SIMDE_FLOAT16_VALUE(9.8), + SIMDE_FLOAT16_VALUE(7.7), SIMDE_FLOAT16_VALUE(7.1), SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(-9.2) }, + { SIMDE_FLOAT16_VALUE(-15.91), SIMDE_FLOAT16_VALUE(14.78), SIMDE_FLOAT16_VALUE(-10.27), SIMDE_FLOAT16_VALUE(7.10), + SIMDE_FLOAT16_VALUE(9.19), SIMDE_FLOAT16_VALUE(-6.97), SIMDE_FLOAT16_VALUE(1.42), SIMDE_FLOAT16_VALUE(19.31) }, + { SIMDE_FLOAT16_VALUE(3.49), SIMDE_FLOAT16_VALUE(-4.42), SIMDE_FLOAT16_VALUE(9.53), SIMDE_FLOAT16_VALUE(7.10), + SIMDE_FLOAT16_VALUE(2.59), SIMDE_FLOAT16_VALUE(2.83), SIMDE_FLOAT16_VALUE(10.62), SIMDE_FLOAT16_VALUE(3.91) }, + { SIMDE_FLOAT16_VALUE(-97.39), SIMDE_FLOAT16_VALUE(95.42), SIMDE_FLOAT16_VALUE(-93.43), SIMDE_FLOAT16_VALUE(7.10), + SIMDE_FLOAT16_VALUE(36.91), SIMDE_FLOAT16_VALUE(-48.13), SIMDE_FLOAT16_VALUE(-37.22), SIMDE_FLOAT16_VALUE(83.99) }, + { SIMDE_FLOAT16_VALUE(91.76), SIMDE_FLOAT16_VALUE(-91.78), SIMDE_FLOAT16_VALUE(99.62), SIMDE_FLOAT16_VALUE(7.10), + SIMDE_FLOAT16_VALUE(-27.44), SIMDE_FLOAT16_VALUE(47.42), SIMDE_FLOAT16_VALUE(52.48), SIMDE_FLOAT16_VALUE(-66.16) }, + { SIMDE_FLOAT16_VALUE(71.39), SIMDE_FLOAT16_VALUE(-71.62), SIMDE_FLOAT16_VALUE(78.83), SIMDE_FLOAT16_VALUE(7.10), + SIMDE_FLOAT16_VALUE(-20.51), SIMDE_FLOAT16_VALUE(37.13), SIMDE_FLOAT16_VALUE(42.82), SIMDE_FLOAT16_VALUE(-49.99) }, + { SIMDE_FLOAT16_VALUE(65.57), SIMDE_FLOAT16_VALUE(-65.86), SIMDE_FLOAT16_VALUE(72.89), SIMDE_FLOAT16_VALUE(7.10), + SIMDE_FLOAT16_VALUE(-18.53), SIMDE_FLOAT16_VALUE(34.19), SIMDE_FLOAT16_VALUE(40.06), SIMDE_FLOAT16_VALUE(-45.37) }, + { SIMDE_FLOAT16_VALUE(10.28), SIMDE_FLOAT16_VALUE(-11.14), SIMDE_FLOAT16_VALUE(16.46), SIMDE_FLOAT16_VALUE(7.10), + SIMDE_FLOAT16_VALUE(0.28), SIMDE_FLOAT16_VALUE(6.26), SIMDE_FLOAT16_VALUE(13.84), SIMDE_FLOAT16_VALUE(-1.48) }, + { SIMDE_FLOAT16_VALUE(-92.54), SIMDE_FLOAT16_VALUE(90.62), SIMDE_FLOAT16_VALUE(-88.48), SIMDE_FLOAT16_VALUE(7.10), + SIMDE_FLOAT16_VALUE(35.26), SIMDE_FLOAT16_VALUE(-45.68), SIMDE_FLOAT16_VALUE(-34.92), SIMDE_FLOAT16_VALUE(80.14) } }, + { { SIMDE_FLOAT16_VALUE(-2.0), SIMDE_FLOAT16_VALUE(-7.1), SIMDE_FLOAT16_VALUE(-7.2), SIMDE_FLOAT16_VALUE(-7.1), + SIMDE_FLOAT16_VALUE(-8.4), SIMDE_FLOAT16_VALUE(-0.1), SIMDE_FLOAT16_VALUE(-2.9), SIMDE_FLOAT16_VALUE(-1.5) }, + { SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(3.3), SIMDE_FLOAT16_VALUE(-0.3), SIMDE_FLOAT16_VALUE(-3.0), + SIMDE_FLOAT16_VALUE(-1.0), SIMDE_FLOAT16_VALUE(0.8), SIMDE_FLOAT16_VALUE(-2.7), SIMDE_FLOAT16_VALUE(-8.8) }, + { SIMDE_FLOAT16_VALUE(2.7), SIMDE_FLOAT16_VALUE(-7.8), SIMDE_FLOAT16_VALUE(2.6), SIMDE_FLOAT16_VALUE(-0.8), + SIMDE_FLOAT16_VALUE(1.3), SIMDE_FLOAT16_VALUE(6.0), SIMDE_FLOAT16_VALUE(-8.7), SIMDE_FLOAT16_VALUE(4.7) }, + { SIMDE_FLOAT16_VALUE(-3.08), SIMDE_FLOAT16_VALUE(-16.01), SIMDE_FLOAT16_VALUE(-6.39), SIMDE_FLOAT16_VALUE(1.00), + SIMDE_FLOAT16_VALUE(-5.70), SIMDE_FLOAT16_VALUE(-2.26), SIMDE_FLOAT16_VALUE(4.39), SIMDE_FLOAT16_VALUE(22.26) }, + { SIMDE_FLOAT16_VALUE(1.12), SIMDE_FLOAT16_VALUE(18.64), SIMDE_FLOAT16_VALUE(-9.54), SIMDE_FLOAT16_VALUE(-30.50), + SIMDE_FLOAT16_VALUE(-16.20), SIMDE_FLOAT16_VALUE(6.14), SIMDE_FLOAT16_VALUE(-23.96), SIMDE_FLOAT16_VALUE(-70.14) }, + { SIMDE_FLOAT16_VALUE(-3.04), SIMDE_FLOAT16_VALUE(-15.68), SIMDE_FLOAT16_VALUE(-6.42), SIMDE_FLOAT16_VALUE(0.70), + SIMDE_FLOAT16_VALUE(-5.80), SIMDE_FLOAT16_VALUE(-2.18), SIMDE_FLOAT16_VALUE(4.12), SIMDE_FLOAT16_VALUE(21.38) }, + { SIMDE_FLOAT16_VALUE(-1.68), SIMDE_FLOAT16_VALUE(-4.46), SIMDE_FLOAT16_VALUE(-7.44), SIMDE_FLOAT16_VALUE(-9.50), + SIMDE_FLOAT16_VALUE(-9.20), SIMDE_FLOAT16_VALUE(0.54), SIMDE_FLOAT16_VALUE(-5.06), SIMDE_FLOAT16_VALUE(-8.54) }, + { SIMDE_FLOAT16_VALUE(-2.52), SIMDE_FLOAT16_VALUE(-11.39), SIMDE_FLOAT16_VALUE(-6.81), SIMDE_FLOAT16_VALUE(-3.20), + SIMDE_FLOAT16_VALUE(-7.10), SIMDE_FLOAT16_VALUE(-1.14), SIMDE_FLOAT16_VALUE(0.61), SIMDE_FLOAT16_VALUE(9.94) }, + { SIMDE_FLOAT16_VALUE(-4.40), SIMDE_FLOAT16_VALUE(-26.90), SIMDE_FLOAT16_VALUE(-5.40), SIMDE_FLOAT16_VALUE(10.90), + SIMDE_FLOAT16_VALUE(-2.40), SIMDE_FLOAT16_VALUE(-4.90), SIMDE_FLOAT16_VALUE(13.30), SIMDE_FLOAT16_VALUE(51.30) }, + { SIMDE_FLOAT16_VALUE(1.48), SIMDE_FLOAT16_VALUE(21.61), SIMDE_FLOAT16_VALUE(-9.81), SIMDE_FLOAT16_VALUE(-33.20), + SIMDE_FLOAT16_VALUE(-17.10), SIMDE_FLOAT16_VALUE(6.86), SIMDE_FLOAT16_VALUE(-26.39), SIMDE_FLOAT16_VALUE(-78.06) }, + { SIMDE_FLOAT16_VALUE(-3.88), SIMDE_FLOAT16_VALUE(-22.61), SIMDE_FLOAT16_VALUE(-5.79), SIMDE_FLOAT16_VALUE(7.00), + SIMDE_FLOAT16_VALUE(-3.70), SIMDE_FLOAT16_VALUE(-3.86), SIMDE_FLOAT16_VALUE(9.79), SIMDE_FLOAT16_VALUE(39.86) } }, + { { SIMDE_FLOAT16_VALUE(-5.1), SIMDE_FLOAT16_VALUE(-4.8), SIMDE_FLOAT16_VALUE(2.8), SIMDE_FLOAT16_VALUE(6.7), + SIMDE_FLOAT16_VALUE(4.0), SIMDE_FLOAT16_VALUE(-3.1), SIMDE_FLOAT16_VALUE(-2.8), SIMDE_FLOAT16_VALUE(5.0) }, + { SIMDE_FLOAT16_VALUE(1.3), SIMDE_FLOAT16_VALUE(-7.7), SIMDE_FLOAT16_VALUE(4.8), SIMDE_FLOAT16_VALUE(2.2), + SIMDE_FLOAT16_VALUE(-4.5), SIMDE_FLOAT16_VALUE(3.7), SIMDE_FLOAT16_VALUE(2.8), SIMDE_FLOAT16_VALUE(-4.0) }, + { SIMDE_FLOAT16_VALUE(-2.4), SIMDE_FLOAT16_VALUE(-1.9), SIMDE_FLOAT16_VALUE(3.6), SIMDE_FLOAT16_VALUE(0.7), + SIMDE_FLOAT16_VALUE(7.3), SIMDE_FLOAT16_VALUE(6.4), SIMDE_FLOAT16_VALUE(4.1), SIMDE_FLOAT16_VALUE(1.7) }, + { SIMDE_FLOAT16_VALUE(-1.98), SIMDE_FLOAT16_VALUE(-23.28), SIMDE_FLOAT16_VALUE(14.32), SIMDE_FLOAT16_VALUE(11.98), + SIMDE_FLOAT16_VALUE(-6.80), SIMDE_FLOAT16_VALUE(5.78), SIMDE_FLOAT16_VALUE(3.92), SIMDE_FLOAT16_VALUE(-4.60) }, + { SIMDE_FLOAT16_VALUE(-2.63), SIMDE_FLOAT16_VALUE(-19.43), SIMDE_FLOAT16_VALUE(11.92), SIMDE_FLOAT16_VALUE(10.88), + SIMDE_FLOAT16_VALUE(-4.55), SIMDE_FLOAT16_VALUE(3.93), SIMDE_FLOAT16_VALUE(2.52), SIMDE_FLOAT16_VALUE(-2.60) }, + { SIMDE_FLOAT16_VALUE(-9.78), SIMDE_FLOAT16_VALUE(22.92), SIMDE_FLOAT16_VALUE(-14.48), SIMDE_FLOAT16_VALUE(-1.22), + SIMDE_FLOAT16_VALUE(20.20), SIMDE_FLOAT16_VALUE(-16.42), SIMDE_FLOAT16_VALUE(-12.88), SIMDE_FLOAT16_VALUE(19.40) }, + { SIMDE_FLOAT16_VALUE(-6.01), SIMDE_FLOAT16_VALUE(0.59), SIMDE_FLOAT16_VALUE(-0.56), SIMDE_FLOAT16_VALUE(5.16), + SIMDE_FLOAT16_VALUE(7.15), SIMDE_FLOAT16_VALUE(-5.69), SIMDE_FLOAT16_VALUE(-4.76), SIMDE_FLOAT16_VALUE(7.80) }, + { SIMDE_FLOAT16_VALUE(-14.59), SIMDE_FLOAT16_VALUE(51.41), SIMDE_FLOAT16_VALUE(-32.24), SIMDE_FLOAT16_VALUE(-9.36), + SIMDE_FLOAT16_VALUE(36.85), SIMDE_FLOAT16_VALUE(-30.11), SIMDE_FLOAT16_VALUE(-23.24), SIMDE_FLOAT16_VALUE(34.20) }, + { SIMDE_FLOAT16_VALUE(-13.42), SIMDE_FLOAT16_VALUE(44.48), SIMDE_FLOAT16_VALUE(-27.92), SIMDE_FLOAT16_VALUE(-7.38), + SIMDE_FLOAT16_VALUE(32.80), SIMDE_FLOAT16_VALUE(-26.78), SIMDE_FLOAT16_VALUE(-20.72), SIMDE_FLOAT16_VALUE(30.60) }, + { SIMDE_FLOAT16_VALUE(-10.43), SIMDE_FLOAT16_VALUE(26.77), SIMDE_FLOAT16_VALUE(-16.88), SIMDE_FLOAT16_VALUE(-2.32), + SIMDE_FLOAT16_VALUE(22.45), SIMDE_FLOAT16_VALUE(-18.27), SIMDE_FLOAT16_VALUE(-14.28), SIMDE_FLOAT16_VALUE(21.40) }, + { SIMDE_FLOAT16_VALUE(-7.31), SIMDE_FLOAT16_VALUE(8.29), SIMDE_FLOAT16_VALUE(-5.36), SIMDE_FLOAT16_VALUE(2.96), + SIMDE_FLOAT16_VALUE(11.65), SIMDE_FLOAT16_VALUE(-9.39), SIMDE_FLOAT16_VALUE(-7.56), SIMDE_FLOAT16_VALUE(11.80) } }, + { { SIMDE_FLOAT16_VALUE(-2.4), SIMDE_FLOAT16_VALUE(4.8), SIMDE_FLOAT16_VALUE(3.5), SIMDE_FLOAT16_VALUE(-2.1), + SIMDE_FLOAT16_VALUE(5.3), SIMDE_FLOAT16_VALUE(5.1), SIMDE_FLOAT16_VALUE(5.0), SIMDE_FLOAT16_VALUE(3.5) }, + { SIMDE_FLOAT16_VALUE(5.5), SIMDE_FLOAT16_VALUE(-8.7), SIMDE_FLOAT16_VALUE(3.2), SIMDE_FLOAT16_VALUE(1.4), + SIMDE_FLOAT16_VALUE(-7.7), SIMDE_FLOAT16_VALUE(4.1), SIMDE_FLOAT16_VALUE(9.4), SIMDE_FLOAT16_VALUE(-8.2) }, + { SIMDE_FLOAT16_VALUE(4.9), SIMDE_FLOAT16_VALUE(-7.7), SIMDE_FLOAT16_VALUE(-1.5), SIMDE_FLOAT16_VALUE(-1.4), + SIMDE_FLOAT16_VALUE(0.0), SIMDE_FLOAT16_VALUE(6.1), SIMDE_FLOAT16_VALUE(3.4), SIMDE_FLOAT16_VALUE(0.1) }, + { SIMDE_FLOAT16_VALUE(-29.35), SIMDE_FLOAT16_VALUE(47.43), SIMDE_FLOAT16_VALUE(-12.18), SIMDE_FLOAT16_VALUE(-8.96), + SIMDE_FLOAT16_VALUE(43.03), SIMDE_FLOAT16_VALUE(-14.99), SIMDE_FLOAT16_VALUE(-41.06), SIMDE_FLOAT16_VALUE(43.68) }, + { SIMDE_FLOAT16_VALUE(39.95), SIMDE_FLOAT16_VALUE(-62.19), SIMDE_FLOAT16_VALUE(28.14), SIMDE_FLOAT16_VALUE(8.68), + SIMDE_FLOAT16_VALUE(-53.99), SIMDE_FLOAT16_VALUE(36.67), SIMDE_FLOAT16_VALUE(77.38), SIMDE_FLOAT16_VALUE(-59.64) }, + { SIMDE_FLOAT16_VALUE(5.85), SIMDE_FLOAT16_VALUE(-8.25), SIMDE_FLOAT16_VALUE(8.30), SIMDE_FLOAT16_VALUE(-0.00), + SIMDE_FLOAT16_VALUE(-6.25), SIMDE_FLOAT16_VALUE(11.25), SIMDE_FLOAT16_VALUE(19.10), SIMDE_FLOAT16_VALUE(-8.80) }, + { SIMDE_FLOAT16_VALUE(5.30), SIMDE_FLOAT16_VALUE(-7.38), SIMDE_FLOAT16_VALUE(7.98), SIMDE_FLOAT16_VALUE(-0.14), + SIMDE_FLOAT16_VALUE(-5.48), SIMDE_FLOAT16_VALUE(10.84), SIMDE_FLOAT16_VALUE(18.16), SIMDE_FLOAT16_VALUE(-7.98) }, + { SIMDE_FLOAT16_VALUE(-2.40), SIMDE_FLOAT16_VALUE(4.80), SIMDE_FLOAT16_VALUE(3.50), SIMDE_FLOAT16_VALUE(-2.10), + SIMDE_FLOAT16_VALUE(5.30), SIMDE_FLOAT16_VALUE(5.10), SIMDE_FLOAT16_VALUE(5.00), SIMDE_FLOAT16_VALUE(3.50) }, + { SIMDE_FLOAT16_VALUE(-35.95), SIMDE_FLOAT16_VALUE(57.87), SIMDE_FLOAT16_VALUE(-16.02), SIMDE_FLOAT16_VALUE(-10.64), + SIMDE_FLOAT16_VALUE(52.27), SIMDE_FLOAT16_VALUE(-19.91), SIMDE_FLOAT16_VALUE(-52.34), SIMDE_FLOAT16_VALUE(53.52) }, + { SIMDE_FLOAT16_VALUE(-21.10), SIMDE_FLOAT16_VALUE(34.38), SIMDE_FLOAT16_VALUE(-7.38), SIMDE_FLOAT16_VALUE(-6.86), + SIMDE_FLOAT16_VALUE(31.48), SIMDE_FLOAT16_VALUE(-8.84), SIMDE_FLOAT16_VALUE(-26.96), SIMDE_FLOAT16_VALUE(31.38) }, + { SIMDE_FLOAT16_VALUE(-2.95), SIMDE_FLOAT16_VALUE(5.67), SIMDE_FLOAT16_VALUE(3.18), SIMDE_FLOAT16_VALUE(-2.24), + SIMDE_FLOAT16_VALUE(6.07), SIMDE_FLOAT16_VALUE(4.69), SIMDE_FLOAT16_VALUE(4.06), SIMDE_FLOAT16_VALUE(4.32) } }, + { { SIMDE_FLOAT16_VALUE(-6.3), SIMDE_FLOAT16_VALUE(7.2), SIMDE_FLOAT16_VALUE(6.8), SIMDE_FLOAT16_VALUE(3.2), + SIMDE_FLOAT16_VALUE(-7.6), SIMDE_FLOAT16_VALUE(5.6), SIMDE_FLOAT16_VALUE(5.7), SIMDE_FLOAT16_VALUE(3.3) }, + { SIMDE_FLOAT16_VALUE(8.6), SIMDE_FLOAT16_VALUE(3.3), SIMDE_FLOAT16_VALUE(-6.4), SIMDE_FLOAT16_VALUE(-1.8), + SIMDE_FLOAT16_VALUE(7.2), SIMDE_FLOAT16_VALUE(3.7), SIMDE_FLOAT16_VALUE(0.2), SIMDE_FLOAT16_VALUE(2.6) }, + { SIMDE_FLOAT16_VALUE(-3.5), SIMDE_FLOAT16_VALUE(6.8), SIMDE_FLOAT16_VALUE(-4.2), SIMDE_FLOAT16_VALUE(-5.3), + SIMDE_FLOAT16_VALUE(-0.7), SIMDE_FLOAT16_VALUE(-4.7), SIMDE_FLOAT16_VALUE(8.2), SIMDE_FLOAT16_VALUE(6.6) }, + { SIMDE_FLOAT16_VALUE(23.80), SIMDE_FLOAT16_VALUE(18.75), SIMDE_FLOAT16_VALUE(-15.60), SIMDE_FLOAT16_VALUE(-3.10), + SIMDE_FLOAT16_VALUE(17.60), SIMDE_FLOAT16_VALUE(18.55), SIMDE_FLOAT16_VALUE(6.40), SIMDE_FLOAT16_VALUE(12.40) }, + { SIMDE_FLOAT16_VALUE(-64.78), SIMDE_FLOAT16_VALUE(-15.24), SIMDE_FLOAT16_VALUE(50.32), SIMDE_FLOAT16_VALUE(15.44), + SIMDE_FLOAT16_VALUE(-56.56), SIMDE_FLOAT16_VALUE(-19.56), SIMDE_FLOAT16_VALUE(4.34), SIMDE_FLOAT16_VALUE(-14.38) }, + { SIMDE_FLOAT16_VALUE(29.82), SIMDE_FLOAT16_VALUE(21.06), SIMDE_FLOAT16_VALUE(-20.08), SIMDE_FLOAT16_VALUE(-4.36), + SIMDE_FLOAT16_VALUE(22.64), SIMDE_FLOAT16_VALUE(21.14), SIMDE_FLOAT16_VALUE(6.54), SIMDE_FLOAT16_VALUE(14.22) }, + { SIMDE_FLOAT16_VALUE(39.28), SIMDE_FLOAT16_VALUE(24.69), SIMDE_FLOAT16_VALUE(-27.12), SIMDE_FLOAT16_VALUE(-6.34), + SIMDE_FLOAT16_VALUE(30.56), SIMDE_FLOAT16_VALUE(25.21), SIMDE_FLOAT16_VALUE(6.76), SIMDE_FLOAT16_VALUE(17.08) }, + { SIMDE_FLOAT16_VALUE(-0.28), SIMDE_FLOAT16_VALUE(9.51), SIMDE_FLOAT16_VALUE(2.32), SIMDE_FLOAT16_VALUE(1.94), + SIMDE_FLOAT16_VALUE(-2.56), SIMDE_FLOAT16_VALUE(8.19), SIMDE_FLOAT16_VALUE(5.84), SIMDE_FLOAT16_VALUE(5.12) }, + { SIMDE_FLOAT16_VALUE(34.12), SIMDE_FLOAT16_VALUE(22.71), SIMDE_FLOAT16_VALUE(-23.28), SIMDE_FLOAT16_VALUE(-5.26), + SIMDE_FLOAT16_VALUE(26.24), SIMDE_FLOAT16_VALUE(22.99), SIMDE_FLOAT16_VALUE(6.64), SIMDE_FLOAT16_VALUE(15.52) }, + { SIMDE_FLOAT16_VALUE(-76.82), SIMDE_FLOAT16_VALUE(-19.86), SIMDE_FLOAT16_VALUE(59.28), SIMDE_FLOAT16_VALUE(17.96), + SIMDE_FLOAT16_VALUE(-66.64), SIMDE_FLOAT16_VALUE(-24.74), SIMDE_FLOAT16_VALUE(4.06), SIMDE_FLOAT16_VALUE(-18.02) }, + { SIMDE_FLOAT16_VALUE(-63.06), SIMDE_FLOAT16_VALUE(-14.58), SIMDE_FLOAT16_VALUE(49.04), SIMDE_FLOAT16_VALUE(15.08), + SIMDE_FLOAT16_VALUE(-55.12), SIMDE_FLOAT16_VALUE(-18.82), SIMDE_FLOAT16_VALUE(4.38), SIMDE_FLOAT16_VALUE(-13.86) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t v = simde_vld1q_f16(test_vec[i].v); + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16x8_t r0 = simde_vfmsq_laneq_f16(a, b, v, 0); + simde_float16x8_t r1 = simde_vfmsq_laneq_f16(a, b, v, 1); + simde_float16x8_t r2 = simde_vfmsq_laneq_f16(a, b, v, 2); + simde_float16x8_t r3 = simde_vfmsq_laneq_f16(a, b, v, 3); + simde_float16x8_t r4 = simde_vfmsq_laneq_f16(a, b, v, 4); + simde_float16x8_t r5 = simde_vfmsq_laneq_f16(a, b, v, 5); + simde_float16x8_t r6 = simde_vfmsq_laneq_f16(a, b, v, 6); + simde_float16x8_t r7 = simde_vfmsq_laneq_f16(a, b, v, 7); + simde_test_arm_neon_assert_equal_f16x8(r0, simde_vld1q_f16(test_vec[i].r0), 1); + simde_test_arm_neon_assert_equal_f16x8(r1, simde_vld1q_f16(test_vec[i].r1), 1); + simde_test_arm_neon_assert_equal_f16x8(r2, simde_vld1q_f16(test_vec[i].r2), 1); + simde_test_arm_neon_assert_equal_f16x8(r3, simde_vld1q_f16(test_vec[i].r3), 1); + simde_test_arm_neon_assert_equal_f16x8(r4, simde_vld1q_f16(test_vec[i].r4), 1); + simde_test_arm_neon_assert_equal_f16x8(r5, simde_vld1q_f16(test_vec[i].r5), 1); + simde_test_arm_neon_assert_equal_f16x8(r6, simde_vld1q_f16(test_vec[i].r6), 1); + simde_test_arm_neon_assert_equal_f16x8(r7, simde_vld1q_f16(test_vec[i].r7), 1); + } + + return 0; +} + +static int +test_simde_vfmsq_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32_t a[4]; + simde_float32_t b[4]; + simde_float32_t v[4]; + simde_float32_t r0[4]; + simde_float32_t r1[4]; + simde_float32_t r2[4]; + simde_float32_t r3[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(102.82656), SIMDE_FLOAT32_C(951.11129), SIMDE_FLOAT32_C(2.40355), SIMDE_FLOAT32_C(-369.45028) }, + { SIMDE_FLOAT32_C(888.92002), SIMDE_FLOAT32_C(-752.99577), SIMDE_FLOAT32_C(-939.64723), SIMDE_FLOAT32_C(960.22599) }, + { SIMDE_FLOAT32_C(-371.75725), SIMDE_FLOAT32_C(966.75005), SIMDE_FLOAT32_C(-45.49931), SIMDE_FLOAT32_C(-821.43020) }, + { SIMDE_FLOAT32_C(330565.28534), SIMDE_FLOAT32_C(-278980.52517), SIMDE_FLOAT32_C(-349318.26573), SIMDE_FLOAT32_C(356601.51932) }, + { SIMDE_FLOAT32_C(-859260.64451), SIMDE_FLOAT32_C(728909.81394), SIMDE_FLOAT32_C(908406.41405), SIMDE_FLOAT32_C(-928667.97061) }, + { SIMDE_FLOAT32_C(40548.07757), SIMDE_FLOAT32_C(-33309.67992), SIMDE_FLOAT32_C(-42750.90103), SIMDE_FLOAT32_C(43320.17341) }, + { SIMDE_FLOAT32_C(730288.57064), SIMDE_FLOAT32_C(-617582.35546), SIMDE_FLOAT32_C(-771852.20821), SIMDE_FLOAT32_C(788389.17004) } }, + { { SIMDE_FLOAT32_C(-291.65071), SIMDE_FLOAT32_C(-154.24511), SIMDE_FLOAT32_C(527.37848), SIMDE_FLOAT32_C(-472.25705) }, + { SIMDE_FLOAT32_C(-390.25343), SIMDE_FLOAT32_C(-977.99004), SIMDE_FLOAT32_C(-648.10719), SIMDE_FLOAT32_C(-638.11574) }, + { SIMDE_FLOAT32_C(241.11814), SIMDE_FLOAT32_C(-231.93603), SIMDE_FLOAT32_C(5.65139), SIMDE_FLOAT32_C(808.77716) }, + { SIMDE_FLOAT32_C(93805.53156), SIMDE_FLOAT32_C(235656.89555), SIMDE_FLOAT32_C(156797.78003), SIMDE_FLOAT32_C(153389.02404) }, + { SIMDE_FLOAT32_C(-90805.48206), SIMDE_FLOAT32_C(-226985.37115), SIMDE_FLOAT32_C(-149792.02989), SIMDE_FLOAT32_C(-148474.28760) }, + { SIMDE_FLOAT32_C(1913.82518), SIMDE_FLOAT32_C(5372.76190), SIMDE_FLOAT32_C(4190.08756), SIMDE_FLOAT32_C(3133.98639) }, + { SIMDE_FLOAT32_C(315336.41460), SIMDE_FLOAT32_C(790821.76825), SIMDE_FLOAT32_C(524701.67695), SIMDE_FLOAT32_C(515621.18274) } }, + { { SIMDE_FLOAT32_C(-479.10655), SIMDE_FLOAT32_C(450.71118), SIMDE_FLOAT32_C(-184.72924), SIMDE_FLOAT32_C(-676.51379) }, + { SIMDE_FLOAT32_C(-275.05178), SIMDE_FLOAT32_C(688.51605), SIMDE_FLOAT32_C(-579.44131), SIMDE_FLOAT32_C(565.66724) }, + { SIMDE_FLOAT32_C(244.78459), SIMDE_FLOAT32_C(355.79255), SIMDE_FLOAT32_C(-611.21600), SIMDE_FLOAT32_C(-983.17422) }, + { SIMDE_FLOAT32_C(66849.33234), SIMDE_FLOAT32_C(-168087.41123), SIMDE_FLOAT32_C(141653.57685), SIMDE_FLOAT32_C(-139143.14072) }, + { SIMDE_FLOAT32_C(97382.26797), SIMDE_FLOAT32_C(-244518.16959), SIMDE_FLOAT32_C(205976.17132), SIMDE_FLOAT32_C(-201936.70429) }, + { SIMDE_FLOAT32_C(-168595.15677), SIMDE_FLOAT32_C(421282.73879), SIMDE_FLOAT32_C(-354348.52963), SIMDE_FLOAT32_C(345068.35705) }, + { SIMDE_FLOAT32_C(-270902.92683), SIMDE_FLOAT32_C(677381.94084), SIMDE_FLOAT32_C(-569876.48550), SIMDE_FLOAT32_C(555472.93590) } }, + { { SIMDE_FLOAT32_C(598.94398), SIMDE_FLOAT32_C(-678.76233), SIMDE_FLOAT32_C(525.14756), SIMDE_FLOAT32_C(-393.56775) }, + { SIMDE_FLOAT32_C(270.63878), SIMDE_FLOAT32_C(62.33552), SIMDE_FLOAT32_C(532.73637), SIMDE_FLOAT32_C(-53.87204) }, + { SIMDE_FLOAT32_C(-402.56303), SIMDE_FLOAT32_C(-875.39401), SIMDE_FLOAT32_C(-525.16995), SIMDE_FLOAT32_C(0.83003) }, + { SIMDE_FLOAT32_C(109548.11225), SIMDE_FLOAT32_C(24415.21183), SIMDE_FLOAT32_C(214985.11298), SIMDE_FLOAT32_C(-22080.45814) }, + { SIMDE_FLOAT32_C(237514.51413), SIMDE_FLOAT32_C(53889.37518), SIMDE_FLOAT32_C(466879.37302), SIMDE_FLOAT32_C(-47552.82637) }, + { SIMDE_FLOAT32_C(142730.30110), SIMDE_FLOAT32_C(32057.97775), SIMDE_FLOAT32_C(280302.28049), SIMDE_FLOAT32_C(-28685.54292) }, + { SIMDE_FLOAT32_C(374.30701), SIMDE_FLOAT32_C(-730.50237), SIMDE_FLOAT32_C(82.96301), SIMDE_FLOAT32_C(-348.85261) } }, + { { SIMDE_FLOAT32_C(-230.44369), SIMDE_FLOAT32_C(-546.43451), SIMDE_FLOAT32_C(-247.60873), SIMDE_FLOAT32_C(644.18364) }, + { SIMDE_FLOAT32_C(-992.70944), SIMDE_FLOAT32_C(100.10228), SIMDE_FLOAT32_C(892.71396), SIMDE_FLOAT32_C(179.76280) }, + { SIMDE_FLOAT32_C(-390.46056), SIMDE_FLOAT32_C(-238.49889), SIMDE_FLOAT32_C(-25.44016), SIMDE_FLOAT32_C(716.38644) }, + { SIMDE_FLOAT32_C(-387844.33013), SIMDE_FLOAT32_C(38539.55701), SIMDE_FLOAT32_C(348321.98658), SIMDE_FLOAT32_C(70834.46651) }, + { SIMDE_FLOAT32_C(-236990.53963), SIMDE_FLOAT32_C(23327.84715), SIMDE_FLOAT32_C(212663.67675), SIMDE_FLOAT32_C(43517.41055) }, + { SIMDE_FLOAT32_C(-25485.13297), SIMDE_FLOAT32_C(2000.18367), SIMDE_FLOAT32_C(22463.17933), SIMDE_FLOAT32_C(5217.37838) }, + { SIMDE_FLOAT32_C(710933.13081), SIMDE_FLOAT32_C(-72258.34787), SIMDE_FLOAT32_C(-639775.77848), SIMDE_FLOAT32_C(-128135.44528) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t v = simde_vld1q_f32(test_vec[i].v); + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); + simde_float32x4_t r0 = simde_vfmsq_laneq_f32(a, b, v, 0); + simde_float32x4_t r1 = simde_vfmsq_laneq_f32(a, b, v, 1); + simde_float32x4_t r2 = simde_vfmsq_laneq_f32(a, b, v, 2); + simde_float32x4_t r3 = simde_vfmsq_laneq_f32(a, b, v, 3); + simde_test_arm_neon_assert_equal_f32x4(r0, simde_vld1q_f32(test_vec[i].r0), 1); + simde_test_arm_neon_assert_equal_f32x4(r1, simde_vld1q_f32(test_vec[i].r1), 1); + simde_test_arm_neon_assert_equal_f32x4(r2, simde_vld1q_f32(test_vec[i].r2), 1); + simde_test_arm_neon_assert_equal_f32x4(r3, simde_vld1q_f32(test_vec[i].r3), 1); + } + + return 0; +} + +static int +test_simde_vfmsq_laneq_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64_t a[2]; + simde_float64_t b[2]; + simde_float64_t v[2]; + simde_float64_t r0[2]; + simde_float64_t r1[2]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(2111.40644), SIMDE_FLOAT64_C(147.93113) }, + { SIMDE_FLOAT64_C(3281.30423), SIMDE_FLOAT64_C(-5219.67760) }, + { SIMDE_FLOAT64_C(3585.45315), SIMDE_FLOAT64_C(4506.84828) }, + { SIMDE_FLOAT64_C(-11762851.19121), SIMDE_FLOAT64_C(18715057.43867) }, + { SIMDE_FLOAT64_C(-14786228.90247), SIMDE_FLOAT64_C(23524442.91725) } }, + { { SIMDE_FLOAT64_C(-4250.80606), SIMDE_FLOAT64_C(9340.30783) }, + { SIMDE_FLOAT64_C(-7342.00142), SIMDE_FLOAT64_C(-727.71775) }, + { SIMDE_FLOAT64_C(4396.54168), SIMDE_FLOAT64_C(-6058.97674) }, + { SIMDE_FLOAT64_C(32275164.39840), SIMDE_FLOAT64_C(3208781.71167) }, + { SIMDE_FLOAT64_C(-44489266.61039), SIMDE_FLOAT64_C(-4399884.59644) } }, + { { SIMDE_FLOAT64_C(959.96052), SIMDE_FLOAT64_C(-3408.84969) }, + { SIMDE_FLOAT64_C(-93.01932), SIMDE_FLOAT64_C(-346.20674) }, + { SIMDE_FLOAT64_C(3686.43292), SIMDE_FLOAT64_C(-4978.44355) }, + { SIMDE_FLOAT64_C(343869.43298), SIMDE_FLOAT64_C(1272859.08246) }, + { SIMDE_FLOAT64_C(-462131.45820), SIMDE_FLOAT64_C(-1726979.57269) } }, + { { SIMDE_FLOAT64_C(-4180.69522), SIMDE_FLOAT64_C(3331.97653) }, + { SIMDE_FLOAT64_C(215.33179), SIMDE_FLOAT64_C(4094.47228) }, + { SIMDE_FLOAT64_C(-7142.91683), SIMDE_FLOAT64_C(-4136.77211) }, + { SIMDE_FLOAT64_C(1533916.38871), SIMDE_FLOAT64_C(29249806.96093) }, + { SIMDE_FLOAT64_C(886597.85719), SIMDE_FLOAT64_C(16941230.70986) } }, + { { SIMDE_FLOAT64_C(-2499.69552), SIMDE_FLOAT64_C(4103.78549) }, + { SIMDE_FLOAT64_C(-6459.94724), SIMDE_FLOAT64_C(5532.81151) }, + { SIMDE_FLOAT64_C(4249.54527), SIMDE_FLOAT64_C(1730.69927) }, + { SIMDE_FLOAT64_C(27449338.55907), SIMDE_FLOAT64_C(-23507829.21384) }, + { SIMDE_FLOAT64_C(11177726.29310), SIMDE_FLOAT64_C(-9571529.07100) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2_t v = simde_vld1q_f64(test_vec[i].v); + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); + simde_float64x2_t r0 = simde_vfmsq_laneq_f64(a, b, v, 0); + simde_float64x2_t r1 = simde_vfmsq_laneq_f64(a, b, v, 1); + simde_test_arm_neon_assert_equal_f64x2(r0, simde_vld1q_f64(test_vec[i].r0), 1); + simde_test_arm_neon_assert_equal_f64x2(r1, simde_vld1q_f64(test_vec[i].r1), 1); + } + + return 0; +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vfmsd_lane_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vfmsd_laneq_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vfmss_lane_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vfmss_laneq_f32) + +SIMDE_TEST_FUNC_LIST_ENTRY(vfmsh_lane_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vfmsh_laneq_f16) + +SIMDE_TEST_FUNC_LIST_ENTRY(vfms_lane_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vfms_lane_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vfms_lane_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vfms_laneq_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vfms_laneq_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vfms_laneq_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vfmsq_lane_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vfmsq_lane_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vfmsq_lane_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vfmsq_laneq_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vfmsq_laneq_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vfmsq_laneq_f64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/fms_n.c b/test/arm/neon/fms_n.c new file mode 100644 index 000000000..27c5301a0 --- /dev/null +++ b/test/arm/neon/fms_n.c @@ -0,0 +1,278 @@ +#define SIMDE_TEST_ARM_NEON_INSN fms_n + +#include "test-neon.h" +#include "../../../simde/arm/neon/fms_n.h" + +static int +test_simde_vfms_n_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + simde_float16 b[4]; + simde_float16 c; + simde_float16 r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-4.51), SIMDE_FLOAT16_VALUE(-3.15), SIMDE_FLOAT16_VALUE(4.65), SIMDE_FLOAT16_VALUE(-2.79) }, + { SIMDE_FLOAT16_VALUE(1.88), SIMDE_FLOAT16_VALUE(2.18), SIMDE_FLOAT16_VALUE(-5.14), SIMDE_FLOAT16_VALUE(6.04) }, + SIMDE_FLOAT16_VALUE(8.46), + { SIMDE_FLOAT16_VALUE(-20.42), SIMDE_FLOAT16_VALUE(-21.56), SIMDE_FLOAT16_VALUE(48.18), SIMDE_FLOAT16_VALUE(-53.91) } }, + { { SIMDE_FLOAT16_VALUE(-8.79), SIMDE_FLOAT16_VALUE(7.93), SIMDE_FLOAT16_VALUE(6.44), SIMDE_FLOAT16_VALUE(3.93) }, + { SIMDE_FLOAT16_VALUE(8.41), SIMDE_FLOAT16_VALUE(-1.86), SIMDE_FLOAT16_VALUE(-8.29), SIMDE_FLOAT16_VALUE(6.98) }, + SIMDE_FLOAT16_VALUE(-6.90), + { SIMDE_FLOAT16_VALUE(49.22), SIMDE_FLOAT16_VALUE(-4.89), SIMDE_FLOAT16_VALUE(-50.75), SIMDE_FLOAT16_VALUE(52.13) } }, + { { SIMDE_FLOAT16_VALUE(-5.03), SIMDE_FLOAT16_VALUE(6.91), SIMDE_FLOAT16_VALUE(-1.39), SIMDE_FLOAT16_VALUE(-0.79) }, + { SIMDE_FLOAT16_VALUE(9.89), SIMDE_FLOAT16_VALUE(6.99), SIMDE_FLOAT16_VALUE(8.53), SIMDE_FLOAT16_VALUE(-2.51) }, + SIMDE_FLOAT16_VALUE(2.63), + { SIMDE_FLOAT16_VALUE(-31.05), SIMDE_FLOAT16_VALUE(-11.49), SIMDE_FLOAT16_VALUE(-23.83), SIMDE_FLOAT16_VALUE(5.81) } }, + { { SIMDE_FLOAT16_VALUE(-6.05), SIMDE_FLOAT16_VALUE(-8.23), SIMDE_FLOAT16_VALUE(0.70), SIMDE_FLOAT16_VALUE(-1.65) }, + { SIMDE_FLOAT16_VALUE(-5.28), SIMDE_FLOAT16_VALUE(-9.29), SIMDE_FLOAT16_VALUE(0.62), SIMDE_FLOAT16_VALUE(-4.37) }, + SIMDE_FLOAT16_VALUE(7.61), + { SIMDE_FLOAT16_VALUE(34.19), SIMDE_FLOAT16_VALUE(62.51), SIMDE_FLOAT16_VALUE(-4.01), SIMDE_FLOAT16_VALUE(31.65) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16 c = test_vec[i].c; + simde_float16x4_t r = simde_vfms_n_f16(a, b, c); + + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + } + + return 0; +} + +static int +test_simde_vfmsq_n_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[8]; + simde_float16 b[8]; + simde_float16 c; + simde_float16 r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-0.26), SIMDE_FLOAT16_VALUE(1.88), SIMDE_FLOAT16_VALUE(-3.00), SIMDE_FLOAT16_VALUE(-4.73), + SIMDE_FLOAT16_VALUE(-5.26), SIMDE_FLOAT16_VALUE(3.81), SIMDE_FLOAT16_VALUE(5.67), SIMDE_FLOAT16_VALUE(8.36) }, + { SIMDE_FLOAT16_VALUE(-2.29), SIMDE_FLOAT16_VALUE(-8.62), SIMDE_FLOAT16_VALUE(9.46), SIMDE_FLOAT16_VALUE(-3.37), + SIMDE_FLOAT16_VALUE(2.94), SIMDE_FLOAT16_VALUE(-5.91), SIMDE_FLOAT16_VALUE(-0.56), SIMDE_FLOAT16_VALUE(1.85) }, + SIMDE_FLOAT16_VALUE(-4.07), + { SIMDE_FLOAT16_VALUE(-9.56), SIMDE_FLOAT16_VALUE(-33.16), SIMDE_FLOAT16_VALUE(35.46), SIMDE_FLOAT16_VALUE(-18.41), + SIMDE_FLOAT16_VALUE(6.71), SIMDE_FLOAT16_VALUE(-20.22), SIMDE_FLOAT16_VALUE(3.39), SIMDE_FLOAT16_VALUE(15.91) } }, + { { SIMDE_FLOAT16_VALUE(-7.17), SIMDE_FLOAT16_VALUE(2.26), SIMDE_FLOAT16_VALUE(-4.52), SIMDE_FLOAT16_VALUE(-3.35), + SIMDE_FLOAT16_VALUE(0.91), SIMDE_FLOAT16_VALUE(6.24), SIMDE_FLOAT16_VALUE(7.97), SIMDE_FLOAT16_VALUE(-2.56) }, + { SIMDE_FLOAT16_VALUE(-6.02), SIMDE_FLOAT16_VALUE(-6.45), SIMDE_FLOAT16_VALUE(-0.61), SIMDE_FLOAT16_VALUE(-1.14), + SIMDE_FLOAT16_VALUE(-1.96), SIMDE_FLOAT16_VALUE(6.05), SIMDE_FLOAT16_VALUE(1.48), SIMDE_FLOAT16_VALUE(3.90) }, + SIMDE_FLOAT16_VALUE(-3.01), + { SIMDE_FLOAT16_VALUE(-25.27), SIMDE_FLOAT16_VALUE(-17.14), SIMDE_FLOAT16_VALUE(-6.36), SIMDE_FLOAT16_VALUE(-6.77), + SIMDE_FLOAT16_VALUE(-4.98), SIMDE_FLOAT16_VALUE(24.43), SIMDE_FLOAT16_VALUE(12.41), SIMDE_FLOAT16_VALUE(9.17) } }, + { { SIMDE_FLOAT16_VALUE(7.80), SIMDE_FLOAT16_VALUE(5.47), SIMDE_FLOAT16_VALUE(-3.01), SIMDE_FLOAT16_VALUE(-9.75), + SIMDE_FLOAT16_VALUE(7.74), SIMDE_FLOAT16_VALUE(2.65), SIMDE_FLOAT16_VALUE(2.24), SIMDE_FLOAT16_VALUE(6.66) }, + { SIMDE_FLOAT16_VALUE(-3.26), SIMDE_FLOAT16_VALUE(6.55), SIMDE_FLOAT16_VALUE(4.39), SIMDE_FLOAT16_VALUE(2.46), + SIMDE_FLOAT16_VALUE(5.08), SIMDE_FLOAT16_VALUE(-4.99), SIMDE_FLOAT16_VALUE(3.28), SIMDE_FLOAT16_VALUE(2.77) }, + SIMDE_FLOAT16_VALUE(-8.81), + { SIMDE_FLOAT16_VALUE(-20.90), SIMDE_FLOAT16_VALUE(63.19), SIMDE_FLOAT16_VALUE(35.66), SIMDE_FLOAT16_VALUE(11.96), + SIMDE_FLOAT16_VALUE(52.54), SIMDE_FLOAT16_VALUE(-41.37), SIMDE_FLOAT16_VALUE(31.15), SIMDE_FLOAT16_VALUE(31.03) } }, + { { SIMDE_FLOAT16_VALUE(6.93), SIMDE_FLOAT16_VALUE(9.09), SIMDE_FLOAT16_VALUE(-4.77), SIMDE_FLOAT16_VALUE(4.57), + SIMDE_FLOAT16_VALUE(-7.31), SIMDE_FLOAT16_VALUE(9.96), SIMDE_FLOAT16_VALUE(-0.37), SIMDE_FLOAT16_VALUE(-9.60) }, + { SIMDE_FLOAT16_VALUE(5.24), SIMDE_FLOAT16_VALUE(0.24), SIMDE_FLOAT16_VALUE(1.86), SIMDE_FLOAT16_VALUE(-5.77), + SIMDE_FLOAT16_VALUE(7.31), SIMDE_FLOAT16_VALUE(4.13), SIMDE_FLOAT16_VALUE(5.84), SIMDE_FLOAT16_VALUE(-6.57) }, + SIMDE_FLOAT16_VALUE(7.40), + { SIMDE_FLOAT16_VALUE(-31.91), SIMDE_FLOAT16_VALUE(7.33), SIMDE_FLOAT16_VALUE(-18.57), SIMDE_FLOAT16_VALUE(47.32), + SIMDE_FLOAT16_VALUE(-61.41), SIMDE_FLOAT16_VALUE(-20.61), SIMDE_FLOAT16_VALUE(-43.59), SIMDE_FLOAT16_VALUE(39.08) } }, + { { SIMDE_FLOAT16_VALUE(-1.88), SIMDE_FLOAT16_VALUE(8.86), SIMDE_FLOAT16_VALUE(0.06), SIMDE_FLOAT16_VALUE(-10.00), + SIMDE_FLOAT16_VALUE(-5.39), SIMDE_FLOAT16_VALUE(1.98), SIMDE_FLOAT16_VALUE(-0.77), SIMDE_FLOAT16_VALUE(-2.07) }, + { SIMDE_FLOAT16_VALUE(9.69), SIMDE_FLOAT16_VALUE(6.76), SIMDE_FLOAT16_VALUE(-6.89), SIMDE_FLOAT16_VALUE(-8.13), + SIMDE_FLOAT16_VALUE(1.66), SIMDE_FLOAT16_VALUE(-1.01), SIMDE_FLOAT16_VALUE(-7.80), SIMDE_FLOAT16_VALUE(-3.81) }, + SIMDE_FLOAT16_VALUE(-0.02), + { SIMDE_FLOAT16_VALUE(-1.72), SIMDE_FLOAT16_VALUE(8.97), SIMDE_FLOAT16_VALUE(-0.05), SIMDE_FLOAT16_VALUE(-10.13), + SIMDE_FLOAT16_VALUE(-5.36), SIMDE_FLOAT16_VALUE(1.97), SIMDE_FLOAT16_VALUE(-0.90), SIMDE_FLOAT16_VALUE(-2.13) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16 c = test_vec[i].c; + simde_float16x8_t r = simde_vfmsq_n_f16(a, b, c); + + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + } + + return 0; +} + +static int +test_simde_vfms_n_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32 a[2]; + simde_float32 b[2]; + simde_float32 c; + simde_float32 r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(-35.57796), SIMDE_FLOAT32_C(-59.85786) }, + { SIMDE_FLOAT32_C(17.28136), SIMDE_FLOAT32_C(84.77423) }, + SIMDE_FLOAT32_C(-45.20424), + { SIMDE_FLOAT32_C(745.61292), SIMDE_FLOAT32_C(3772.29681) } }, + { { SIMDE_FLOAT32_C(-17.99824), SIMDE_FLOAT32_C(30.49420) }, + { SIMDE_FLOAT32_C(-20.95882), SIMDE_FLOAT32_C(25.34221) }, + SIMDE_FLOAT32_C(45.99718), + { SIMDE_FLOAT32_C(946.04844), SIMDE_FLOAT32_C(-1135.17594) } }, + { { SIMDE_FLOAT32_C(62.43086), SIMDE_FLOAT32_C(-77.10995) }, + { SIMDE_FLOAT32_C(9.37372), SIMDE_FLOAT32_C(99.88405) }, + SIMDE_FLOAT32_C(-86.22524), + { SIMDE_FLOAT32_C(870.68200), SIMDE_FLOAT32_C(8535.41651) } }, + { { SIMDE_FLOAT32_C(99.02511), SIMDE_FLOAT32_C(72.49425) }, + { SIMDE_FLOAT32_C(22.97394), SIMDE_FLOAT32_C(-52.48535) }, + SIMDE_FLOAT32_C(39.95098), + { SIMDE_FLOAT32_C(-818.80616), SIMDE_FLOAT32_C(2169.33519) } }, + { { SIMDE_FLOAT32_C(41.76455), SIMDE_FLOAT32_C(-70.21716) }, + { SIMDE_FLOAT32_C(-33.65310), SIMDE_FLOAT32_C(3.84507) }, + SIMDE_FLOAT32_C(56.99074), + { SIMDE_FLOAT32_C(1959.67983), SIMDE_FLOAT32_C(-289.35081) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); + simde_float32 c = test_vec[i].c; + simde_float32x2_t r = simde_vfms_n_f32(a, b, c); + + simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); + } + + return 0; +} + +static int +test_simde_vfms_n_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64 a[1]; + simde_float64 b[1]; + simde_float64 c; + simde_float64 r[1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(-337.20280) }, + { SIMDE_FLOAT64_C(929.21901) }, + SIMDE_FLOAT64_C(-356.58915), + { SIMDE_FLOAT64_C(331012.21427) } }, + { { SIMDE_FLOAT64_C(264.59177) }, + { SIMDE_FLOAT64_C(46.08190) }, + SIMDE_FLOAT64_C(-602.18916), + { SIMDE_FLOAT64_C(28014.61335) } }, + { { SIMDE_FLOAT64_C(-652.94501) }, + { SIMDE_FLOAT64_C(83.67625) }, + SIMDE_FLOAT64_C(607.85645), + { SIMDE_FLOAT64_C(-51516.09546) } }, + { { SIMDE_FLOAT64_C(-359.35028) }, + { SIMDE_FLOAT64_C(-580.59986) }, + SIMDE_FLOAT64_C(254.44176), + { SIMDE_FLOAT64_C(147369.49847) } }, + { { SIMDE_FLOAT64_C(952.75267) }, + { SIMDE_FLOAT64_C(405.32469) }, + SIMDE_FLOAT64_C(781.41408), + { SIMDE_FLOAT64_C(-315773.66520) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); + simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); + simde_float64 c = test_vec[i].c; + simde_float64x1_t r = simde_vfms_n_f64(a, b, c); + + simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); + } + + return 0; +} + +static int +test_simde_vfmsq_n_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32 a[4]; + simde_float32 b[4]; + simde_float32 c; + simde_float32 r[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(-76.22244), SIMDE_FLOAT32_C(-7.91594), SIMDE_FLOAT32_C(-21.59222), SIMDE_FLOAT32_C(-84.47692) }, + { SIMDE_FLOAT32_C(88.23311), SIMDE_FLOAT32_C(81.35067), SIMDE_FLOAT32_C(69.16915), SIMDE_FLOAT32_C(-74.96321) }, + SIMDE_FLOAT32_C(-18.21288), + { SIMDE_FLOAT32_C(1530.75616), SIMDE_FLOAT32_C(1473.71374), SIMDE_FLOAT32_C(1238.17683), SIMDE_FLOAT32_C(-1449.77260) } }, + { { SIMDE_FLOAT32_C(75.52575), SIMDE_FLOAT32_C(-81.92949), SIMDE_FLOAT32_C(-11.90210), SIMDE_FLOAT32_C(87.65228) }, + { SIMDE_FLOAT32_C(65.27611), SIMDE_FLOAT32_C(20.43275), SIMDE_FLOAT32_C(-1.91278), SIMDE_FLOAT32_C(-50.01227) }, + SIMDE_FLOAT32_C(0.51320), + { SIMDE_FLOAT32_C(42.02623), SIMDE_FLOAT32_C(-92.41553), SIMDE_FLOAT32_C(-10.92047), SIMDE_FLOAT32_C(113.31844) } }, + { { SIMDE_FLOAT32_C(66.81045), SIMDE_FLOAT32_C(18.76226), SIMDE_FLOAT32_C(-56.30699), SIMDE_FLOAT32_C(-50.25232) }, + { SIMDE_FLOAT32_C(75.53020), SIMDE_FLOAT32_C(45.16634), SIMDE_FLOAT32_C(-50.05386), SIMDE_FLOAT32_C(-97.11726) }, + SIMDE_FLOAT32_C(94.17368), + { SIMDE_FLOAT32_C(-7046.14671), SIMDE_FLOAT32_C(-4234.71828), SIMDE_FLOAT32_C(4657.44900), SIMDE_FLOAT32_C(9095.63779) } }, + { { SIMDE_FLOAT32_C(-57.65770), SIMDE_FLOAT32_C(-33.06791), SIMDE_FLOAT32_C(68.12496), SIMDE_FLOAT32_C(-28.74269) }, + { SIMDE_FLOAT32_C(99.08967), SIMDE_FLOAT32_C(-9.21892), SIMDE_FLOAT32_C(2.91902), SIMDE_FLOAT32_C(35.54978) }, + SIMDE_FLOAT32_C(-93.38670), + { SIMDE_FLOAT32_C(9195.99960), SIMDE_FLOAT32_C(-893.99220), SIMDE_FLOAT32_C(340.72239), SIMDE_FLOAT32_C(3291.13349) } }, + { { SIMDE_FLOAT32_C(26.08616), SIMDE_FLOAT32_C(-31.72849), SIMDE_FLOAT32_C(19.87012), SIMDE_FLOAT32_C(96.24746) }, + { SIMDE_FLOAT32_C(76.20837), SIMDE_FLOAT32_C(47.45190), SIMDE_FLOAT32_C(22.96593), SIMDE_FLOAT32_C(88.46029) }, + SIMDE_FLOAT32_C(-49.17426), + { SIMDE_FLOAT32_C(3773.57590), SIMDE_FLOAT32_C(2301.68334), SIMDE_FLOAT32_C(1149.20249), SIMDE_FLOAT32_C(4446.21640) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); + simde_float32 c = test_vec[i].c; + simde_float32x4_t r = simde_vfmsq_n_f32(a, b, c); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + } + + return 0; +} + +static int +test_simde_vfmsq_n_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64 a[2]; + simde_float64 b[2]; + simde_float64 c; + simde_float64 r[2]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(338.30974), SIMDE_FLOAT64_C(488.97921) }, + { SIMDE_FLOAT64_C(-590.49049), SIMDE_FLOAT64_C(-185.60857) }, + SIMDE_FLOAT64_C(367.60822), + { SIMDE_FLOAT64_C(217407.47232), SIMDE_FLOAT64_C(68720.21765) } }, + { { SIMDE_FLOAT64_C(-790.00195), SIMDE_FLOAT64_C(-762.94472) }, + { SIMDE_FLOAT64_C(-496.73977), SIMDE_FLOAT64_C(86.61846) }, + SIMDE_FLOAT64_C(-65.57304), + { SIMDE_FLOAT64_C(-33362.73803), SIMDE_FLOAT64_C(4916.89121) } }, + { { SIMDE_FLOAT64_C(235.45170), SIMDE_FLOAT64_C(-183.40964) }, + { SIMDE_FLOAT64_C(-591.37023), SIMDE_FLOAT64_C(-176.48337) }, + SIMDE_FLOAT64_C(-197.75591), + { SIMDE_FLOAT64_C(-116711.50776), SIMDE_FLOAT64_C(-35084.04028) } }, + { { SIMDE_FLOAT64_C(911.19764), SIMDE_FLOAT64_C(-667.08555) }, + { SIMDE_FLOAT64_C(-241.31170), SIMDE_FLOAT64_C(-569.81857) }, + SIMDE_FLOAT64_C(-413.42624), + { SIMDE_FLOAT64_C(-98853.39065), SIMDE_FLOAT64_C(-236245.03073) } }, + { { SIMDE_FLOAT64_C(173.38963), SIMDE_FLOAT64_C(-355.04043) }, + { SIMDE_FLOAT64_C(471.15737), SIMDE_FLOAT64_C(779.68624) }, + SIMDE_FLOAT64_C(-562.91794), + { SIMDE_FLOAT64_C(265396.32192), SIMDE_FLOAT64_C(438544.33004) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); + simde_float64 c = test_vec[i].c; + simde_float64x2_t r = simde_vfmsq_n_f64(a, b, c); + + simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); + } + + return 0; +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vfms_n_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vfms_n_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vfms_n_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vfmsq_n_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vfmsq_n_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vfmsq_n_f64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/ld1_dup.c b/test/arm/neon/ld1_dup.c index ebe7f9a2f..a39ef608a 100644 --- a/test/arm/neon/ld1_dup.c +++ b/test/arm/neon/ld1_dup.c @@ -5,6 +5,40 @@ #if !defined(SIMDE_BUG_INTEL_857088) +static int +test_simde_vld1_dup_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a; + simde_float16_t unused; + simde_float16_t r[4]; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE(4.04), + SIMDE_FLOAT16_VALUE(4.51), + { SIMDE_FLOAT16_VALUE(4.04), SIMDE_FLOAT16_VALUE(4.04), SIMDE_FLOAT16_VALUE(4.04), SIMDE_FLOAT16_VALUE(4.04) } }, + { SIMDE_FLOAT16_VALUE(-0.29), + SIMDE_FLOAT16_VALUE(8.79), + { SIMDE_FLOAT16_VALUE(-0.29), SIMDE_FLOAT16_VALUE(-0.29), SIMDE_FLOAT16_VALUE(-0.29), SIMDE_FLOAT16_VALUE(-0.29) } }, + { SIMDE_FLOAT16_VALUE(-1.66), + SIMDE_FLOAT16_VALUE(-8.54), + { SIMDE_FLOAT16_VALUE(-1.66), SIMDE_FLOAT16_VALUE(-1.66), SIMDE_FLOAT16_VALUE(-1.66), SIMDE_FLOAT16_VALUE(-1.66) } }, + { SIMDE_FLOAT16_VALUE(-7.87), + SIMDE_FLOAT16_VALUE(2.04), + { SIMDE_FLOAT16_VALUE(-7.87), SIMDE_FLOAT16_VALUE(-7.87), SIMDE_FLOAT16_VALUE(-7.87), SIMDE_FLOAT16_VALUE(-7.87) } }, + { SIMDE_FLOAT16_VALUE(2.65), + SIMDE_FLOAT16_VALUE(-7.48), + { SIMDE_FLOAT16_VALUE(2.65), SIMDE_FLOAT16_VALUE(2.65), SIMDE_FLOAT16_VALUE(2.65), SIMDE_FLOAT16_VALUE(2.65) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t r = simde_vld1_dup_f16(&test_vec[i].a); + + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), + INT_MAX); + } + + return 0; +} + static int test_simde_vld1_dup_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1228,6 +1262,7 @@ test_simde_vld1q_dup_u64 (SIMDE_MUNIT_TEST_ARGS) { SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vld1_dup_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vld1_dup_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vld1_dup_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vld1_dup_s8) diff --git a/test/arm/neon/ld1_lane.c b/test/arm/neon/ld1_lane.c index 50cea9d9f..16573ce09 100644 --- a/test/arm/neon/ld1_lane.c +++ b/test/arm/neon/ld1_lane.c @@ -383,6 +383,41 @@ test_simde_vld1_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vld1_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t src[4]; + simde_float16_t buf; + simde_float16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-6.89), SIMDE_FLOAT16_VALUE(-8.34), SIMDE_FLOAT16_VALUE(1.02), SIMDE_FLOAT16_VALUE(-3.31) }, + SIMDE_FLOAT16_VALUE(-8.93), + { SIMDE_FLOAT16_VALUE(-8.93), SIMDE_FLOAT16_VALUE(-8.34), SIMDE_FLOAT16_VALUE(1.02), SIMDE_FLOAT16_VALUE(-3.31) } }, + { { SIMDE_FLOAT16_VALUE(8.46), SIMDE_FLOAT16_VALUE(9.18), SIMDE_FLOAT16_VALUE(-5.63), SIMDE_FLOAT16_VALUE(-7.46) }, + SIMDE_FLOAT16_VALUE(4.92), + { SIMDE_FLOAT16_VALUE(8.46), SIMDE_FLOAT16_VALUE(4.92), SIMDE_FLOAT16_VALUE(-5.63), SIMDE_FLOAT16_VALUE(-7.46) } }, + { { SIMDE_FLOAT16_VALUE(4.00), SIMDE_FLOAT16_VALUE(-5.13), SIMDE_FLOAT16_VALUE(7.28), SIMDE_FLOAT16_VALUE(4.93) }, + SIMDE_FLOAT16_VALUE(2.30), + { SIMDE_FLOAT16_VALUE(4.00), SIMDE_FLOAT16_VALUE(-5.13), SIMDE_FLOAT16_VALUE(2.30), SIMDE_FLOAT16_VALUE(4.93) } }, + { { SIMDE_FLOAT16_VALUE(-1.08), SIMDE_FLOAT16_VALUE(-0.44), SIMDE_FLOAT16_VALUE(9.35), SIMDE_FLOAT16_VALUE(7.72) }, + SIMDE_FLOAT16_VALUE(6.05), + { SIMDE_FLOAT16_VALUE(-1.08), SIMDE_FLOAT16_VALUE(-0.44), SIMDE_FLOAT16_VALUE(9.35), SIMDE_FLOAT16_VALUE(6.05) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t r, src, expected; + src = simde_vld1_f16(test_vec[i].src); + + SIMDE_CONSTIFY_4_(simde_vld1_lane_f16, r, (HEDLEY_UNREACHABLE(), r), i, &test_vec[i].buf, src); + + expected = simde_vld1_f16(test_vec[i].r); + + simde_test_arm_neon_assert_equal_f16x4(r, expected, 1); + } + + return 0; +} + static int test_simde_vld1_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -991,6 +1026,69 @@ test_simde_vld1q_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vld1q_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t src[8]; + simde_float16_t buf; + simde_float16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-7.05), SIMDE_FLOAT16_VALUE(6.67), SIMDE_FLOAT16_VALUE(6.59), SIMDE_FLOAT16_VALUE(3.75), + SIMDE_FLOAT16_VALUE(-9.33), SIMDE_FLOAT16_VALUE(3.01), SIMDE_FLOAT16_VALUE(-8.25), SIMDE_FLOAT16_VALUE(0.24) }, + SIMDE_FLOAT16_VALUE(-6.92), + { SIMDE_FLOAT16_VALUE(-6.92), SIMDE_FLOAT16_VALUE(6.67), SIMDE_FLOAT16_VALUE(6.59), SIMDE_FLOAT16_VALUE(3.75), + SIMDE_FLOAT16_VALUE(-9.33), SIMDE_FLOAT16_VALUE(3.01), SIMDE_FLOAT16_VALUE(-8.25), SIMDE_FLOAT16_VALUE(0.24) } }, + { { SIMDE_FLOAT16_VALUE(0.08), SIMDE_FLOAT16_VALUE(-9.60), SIMDE_FLOAT16_VALUE(1.90), SIMDE_FLOAT16_VALUE(9.42), + SIMDE_FLOAT16_VALUE(-7.19), SIMDE_FLOAT16_VALUE(9.43), SIMDE_FLOAT16_VALUE(9.22), SIMDE_FLOAT16_VALUE(-8.41) }, + SIMDE_FLOAT16_VALUE(8.22), + { SIMDE_FLOAT16_VALUE(0.08), SIMDE_FLOAT16_VALUE(8.22), SIMDE_FLOAT16_VALUE(1.90), SIMDE_FLOAT16_VALUE(9.42), + SIMDE_FLOAT16_VALUE(-7.19), SIMDE_FLOAT16_VALUE(9.43), SIMDE_FLOAT16_VALUE(9.22), SIMDE_FLOAT16_VALUE(-8.41) } }, + { { SIMDE_FLOAT16_VALUE(8.85), SIMDE_FLOAT16_VALUE(0.77), SIMDE_FLOAT16_VALUE(5.98), SIMDE_FLOAT16_VALUE(-6.67), + SIMDE_FLOAT16_VALUE(-9.86), SIMDE_FLOAT16_VALUE(-7.30), SIMDE_FLOAT16_VALUE(-7.33), SIMDE_FLOAT16_VALUE(-9.11) }, + SIMDE_FLOAT16_VALUE(7.84), + { SIMDE_FLOAT16_VALUE(8.85), SIMDE_FLOAT16_VALUE(0.77), SIMDE_FLOAT16_VALUE(7.84), SIMDE_FLOAT16_VALUE(-6.67), + SIMDE_FLOAT16_VALUE(-9.86), SIMDE_FLOAT16_VALUE(-7.30), SIMDE_FLOAT16_VALUE(-7.33), SIMDE_FLOAT16_VALUE(-9.11) } }, + { { SIMDE_FLOAT16_VALUE(0.48), SIMDE_FLOAT16_VALUE(-7.75), SIMDE_FLOAT16_VALUE(5.24), SIMDE_FLOAT16_VALUE(0.40), + SIMDE_FLOAT16_VALUE(6.95), SIMDE_FLOAT16_VALUE(4.84), SIMDE_FLOAT16_VALUE(4.63), SIMDE_FLOAT16_VALUE(-7.39) }, + SIMDE_FLOAT16_VALUE(4.37), + { SIMDE_FLOAT16_VALUE(0.48), SIMDE_FLOAT16_VALUE(-7.75), SIMDE_FLOAT16_VALUE(5.24), SIMDE_FLOAT16_VALUE(4.37), + SIMDE_FLOAT16_VALUE(6.95), SIMDE_FLOAT16_VALUE(4.84), SIMDE_FLOAT16_VALUE(4.63), SIMDE_FLOAT16_VALUE(-7.39) } }, + { { SIMDE_FLOAT16_VALUE(-1.01), SIMDE_FLOAT16_VALUE(3.27), SIMDE_FLOAT16_VALUE(6.19), SIMDE_FLOAT16_VALUE(-8.33), + SIMDE_FLOAT16_VALUE(-4.60), SIMDE_FLOAT16_VALUE(-9.70), SIMDE_FLOAT16_VALUE(5.66), SIMDE_FLOAT16_VALUE(-0.67) }, + SIMDE_FLOAT16_VALUE(8.73), + { SIMDE_FLOAT16_VALUE(-1.01), SIMDE_FLOAT16_VALUE(3.27), SIMDE_FLOAT16_VALUE(6.19), SIMDE_FLOAT16_VALUE(-8.33), + SIMDE_FLOAT16_VALUE(8.73), SIMDE_FLOAT16_VALUE(-9.70), SIMDE_FLOAT16_VALUE(5.66), SIMDE_FLOAT16_VALUE(-0.67) } }, + { { SIMDE_FLOAT16_VALUE(7.81), SIMDE_FLOAT16_VALUE(-4.30), SIMDE_FLOAT16_VALUE(-0.94), SIMDE_FLOAT16_VALUE(-1.26), + SIMDE_FLOAT16_VALUE(-7.70), SIMDE_FLOAT16_VALUE(4.04), SIMDE_FLOAT16_VALUE(3.75), SIMDE_FLOAT16_VALUE(8.96) }, + SIMDE_FLOAT16_VALUE(4.12), + { SIMDE_FLOAT16_VALUE(7.81), SIMDE_FLOAT16_VALUE(-4.30), SIMDE_FLOAT16_VALUE(-0.94), SIMDE_FLOAT16_VALUE(-1.26), + SIMDE_FLOAT16_VALUE(-7.70), SIMDE_FLOAT16_VALUE(4.12), SIMDE_FLOAT16_VALUE(3.75), SIMDE_FLOAT16_VALUE(8.96) } }, + { { SIMDE_FLOAT16_VALUE(-2.64), SIMDE_FLOAT16_VALUE(0.23), SIMDE_FLOAT16_VALUE(8.32), SIMDE_FLOAT16_VALUE(4.82), + SIMDE_FLOAT16_VALUE(0.39), SIMDE_FLOAT16_VALUE(-9.75), SIMDE_FLOAT16_VALUE(8.11), SIMDE_FLOAT16_VALUE(3.33) }, + SIMDE_FLOAT16_VALUE(-6.51), + { SIMDE_FLOAT16_VALUE(-2.64), SIMDE_FLOAT16_VALUE(0.23), SIMDE_FLOAT16_VALUE(8.32), SIMDE_FLOAT16_VALUE(4.82), + SIMDE_FLOAT16_VALUE(0.39), SIMDE_FLOAT16_VALUE(-9.75), SIMDE_FLOAT16_VALUE(-6.51), SIMDE_FLOAT16_VALUE(3.33) } }, + { { SIMDE_FLOAT16_VALUE(-0.09), SIMDE_FLOAT16_VALUE(-8.36), SIMDE_FLOAT16_VALUE(1.34), SIMDE_FLOAT16_VALUE(2.32), + SIMDE_FLOAT16_VALUE(9.15), SIMDE_FLOAT16_VALUE(6.52), SIMDE_FLOAT16_VALUE(-6.82), SIMDE_FLOAT16_VALUE(-4.18) }, + SIMDE_FLOAT16_VALUE(6.98), + { SIMDE_FLOAT16_VALUE(-0.09), SIMDE_FLOAT16_VALUE(-8.36), SIMDE_FLOAT16_VALUE(1.34), SIMDE_FLOAT16_VALUE(2.32), + SIMDE_FLOAT16_VALUE(9.15), SIMDE_FLOAT16_VALUE(6.52), SIMDE_FLOAT16_VALUE(-6.82), SIMDE_FLOAT16_VALUE(6.98) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t r, src, expected; + src = simde_vld1q_f16(test_vec[i].src); + + SIMDE_CONSTIFY_8_(simde_vld1q_lane_f16, r, (HEDLEY_UNREACHABLE(), r), i, &test_vec[i].buf, src); + + expected = simde_vld1q_f16(test_vec[i].r); + + simde_test_arm_neon_assert_equal_f16x8(r, expected, 1); + } + + return 0; +} + static int test_simde_vld1q_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1081,6 +1179,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vld1_lane_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vld1_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vld1_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vld1_lane_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld1_lane_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vld1_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vld1_lane_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vld1q_lane_s8) @@ -1091,6 +1190,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vld1q_lane_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vld1q_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vld1q_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vld1q_lane_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld1q_lane_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vld1q_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vld1q_lane_f64) SIMDE_TEST_FUNC_LIST_END diff --git a/test/arm/neon/ld1_x2.c b/test/arm/neon/ld1_x2.c index dc693d237..0f2761373 100644 --- a/test/arm/neon/ld1_x2.c +++ b/test/arm/neon/ld1_x2.c @@ -6,6 +6,46 @@ #if !defined(SIMDE_BUG_INTEL_857088) +static int +test_simde_vld1_f16_x2 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 buf[8]; + simde_float16 expected[2][4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(3.81), SIMDE_FLOAT16_VALUE(-2.59), SIMDE_FLOAT16_VALUE(-2.10), SIMDE_FLOAT16_VALUE(-4.19), + SIMDE_FLOAT16_VALUE(-8.46), SIMDE_FLOAT16_VALUE(3.35), SIMDE_FLOAT16_VALUE(-0.60), SIMDE_FLOAT16_VALUE(-5.62) }, + { { SIMDE_FLOAT16_VALUE(3.81), SIMDE_FLOAT16_VALUE(-2.59), SIMDE_FLOAT16_VALUE(-2.10), SIMDE_FLOAT16_VALUE(-4.19) }, + { SIMDE_FLOAT16_VALUE(-8.46), SIMDE_FLOAT16_VALUE(3.35), SIMDE_FLOAT16_VALUE(-0.60), SIMDE_FLOAT16_VALUE(-5.62) } } }, + { { SIMDE_FLOAT16_VALUE(5.29), SIMDE_FLOAT16_VALUE(2.79), SIMDE_FLOAT16_VALUE(-5.18), SIMDE_FLOAT16_VALUE(-2.73), + SIMDE_FLOAT16_VALUE(0.13), SIMDE_FLOAT16_VALUE(-0.07), SIMDE_FLOAT16_VALUE(-0.93), SIMDE_FLOAT16_VALUE(-8.59) }, + { { SIMDE_FLOAT16_VALUE(5.29), SIMDE_FLOAT16_VALUE(2.79), SIMDE_FLOAT16_VALUE(-5.18), SIMDE_FLOAT16_VALUE(-2.73) }, + { SIMDE_FLOAT16_VALUE(0.13), SIMDE_FLOAT16_VALUE(-0.07), SIMDE_FLOAT16_VALUE(-0.93), SIMDE_FLOAT16_VALUE(-8.59) } } }, + { { SIMDE_FLOAT16_VALUE(-1.35), SIMDE_FLOAT16_VALUE(6.47), SIMDE_FLOAT16_VALUE(-0.26), SIMDE_FLOAT16_VALUE(-8.64), + SIMDE_FLOAT16_VALUE(1.24), SIMDE_FLOAT16_VALUE(8.31), SIMDE_FLOAT16_VALUE(3.54), SIMDE_FLOAT16_VALUE(2.21) }, + { { SIMDE_FLOAT16_VALUE(-1.35), SIMDE_FLOAT16_VALUE(6.47), SIMDE_FLOAT16_VALUE(-0.26), SIMDE_FLOAT16_VALUE(-8.64) }, + { SIMDE_FLOAT16_VALUE(1.24), SIMDE_FLOAT16_VALUE(8.31), SIMDE_FLOAT16_VALUE(3.54), SIMDE_FLOAT16_VALUE(2.21) } } }, + { { SIMDE_FLOAT16_VALUE(-6.50), SIMDE_FLOAT16_VALUE(-7.07), SIMDE_FLOAT16_VALUE(-6.76), SIMDE_FLOAT16_VALUE(7.01), + SIMDE_FLOAT16_VALUE(1.81), SIMDE_FLOAT16_VALUE(9.02), SIMDE_FLOAT16_VALUE(3.69), SIMDE_FLOAT16_VALUE(-8.59) }, + { { SIMDE_FLOAT16_VALUE(-6.50), SIMDE_FLOAT16_VALUE(-7.07), SIMDE_FLOAT16_VALUE(-6.76), SIMDE_FLOAT16_VALUE(7.01) }, + { SIMDE_FLOAT16_VALUE(1.81), SIMDE_FLOAT16_VALUE(9.02), SIMDE_FLOAT16_VALUE(3.69), SIMDE_FLOAT16_VALUE(-8.59) } } }, + { { SIMDE_FLOAT16_VALUE(-7.98), SIMDE_FLOAT16_VALUE(-1.43), SIMDE_FLOAT16_VALUE(8.33), SIMDE_FLOAT16_VALUE(-4.97), + SIMDE_FLOAT16_VALUE(2.58), SIMDE_FLOAT16_VALUE(1.73), SIMDE_FLOAT16_VALUE(0.86), SIMDE_FLOAT16_VALUE(-9.82) }, + { { SIMDE_FLOAT16_VALUE(-7.98), SIMDE_FLOAT16_VALUE(-1.43), SIMDE_FLOAT16_VALUE(8.33), SIMDE_FLOAT16_VALUE(-4.97) }, + { SIMDE_FLOAT16_VALUE(2.58), SIMDE_FLOAT16_VALUE(1.73), SIMDE_FLOAT16_VALUE(0.86), SIMDE_FLOAT16_VALUE(-9.82) } } }, + }; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + simde_float16x4x2_t r = simde_vld1_f16_x2(test_vec[i].buf); + simde_float16x4x2_t expected = {{ + simde_vld1_f16(test_vec[i].expected[0]), + simde_vld1_f16(test_vec[i].expected[1]), + }}; + simde_test_arm_neon_assert_equal_f16x4x2(r, expected, 1); + } + + return 0; +} + static int test_simde_vld1_f32_x2 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -670,6 +710,7 @@ test_simde_vld1_u64_x2 (SIMDE_MUNIT_TEST_ARGS) { SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vld1_f16_x2) SIMDE_TEST_FUNC_LIST_ENTRY(vld1_f32_x2) SIMDE_TEST_FUNC_LIST_ENTRY(vld1_f64_x2) SIMDE_TEST_FUNC_LIST_ENTRY(vld1_s8_x2) diff --git a/test/arm/neon/ld1_x3.c b/test/arm/neon/ld1_x3.c index db23ae02b..092ce0445 100644 --- a/test/arm/neon/ld1_x3.c +++ b/test/arm/neon/ld1_x3.c @@ -6,6 +6,57 @@ #if !defined(SIMDE_BUG_INTEL_857088) +static int +test_simde_vld1_f16_x3 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 buf[12]; + simde_float16 expected[3][4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-5.79), SIMDE_FLOAT16_VALUE(4.80), SIMDE_FLOAT16_VALUE(9.66), SIMDE_FLOAT16_VALUE(-2.49), + SIMDE_FLOAT16_VALUE(-3.48), SIMDE_FLOAT16_VALUE(2.38), SIMDE_FLOAT16_VALUE(-7.06), SIMDE_FLOAT16_VALUE(-9.89), + SIMDE_FLOAT16_VALUE(-0.42), SIMDE_FLOAT16_VALUE(4.02), SIMDE_FLOAT16_VALUE(1.60), SIMDE_FLOAT16_VALUE(4.63) }, + { { SIMDE_FLOAT16_VALUE(-5.79), SIMDE_FLOAT16_VALUE(4.80), SIMDE_FLOAT16_VALUE(9.66), SIMDE_FLOAT16_VALUE(-2.49) }, + { SIMDE_FLOAT16_VALUE(-3.48), SIMDE_FLOAT16_VALUE(2.38), SIMDE_FLOAT16_VALUE(-7.06), SIMDE_FLOAT16_VALUE(-9.89) }, + { SIMDE_FLOAT16_VALUE(-0.42), SIMDE_FLOAT16_VALUE(4.02), SIMDE_FLOAT16_VALUE(1.60), SIMDE_FLOAT16_VALUE(4.63) } } }, + { { SIMDE_FLOAT16_VALUE(-7.02), SIMDE_FLOAT16_VALUE(1.17), SIMDE_FLOAT16_VALUE(-2.58), SIMDE_FLOAT16_VALUE(-1.99), + SIMDE_FLOAT16_VALUE(8.35), SIMDE_FLOAT16_VALUE(-6.76), SIMDE_FLOAT16_VALUE(-2.81), SIMDE_FLOAT16_VALUE(-8.55), + SIMDE_FLOAT16_VALUE(5.89), SIMDE_FLOAT16_VALUE(-9.99), SIMDE_FLOAT16_VALUE(3.60), SIMDE_FLOAT16_VALUE(-5.29) }, + { { SIMDE_FLOAT16_VALUE(-7.02), SIMDE_FLOAT16_VALUE(1.17), SIMDE_FLOAT16_VALUE(-2.58), SIMDE_FLOAT16_VALUE(-1.99) }, + { SIMDE_FLOAT16_VALUE(8.35), SIMDE_FLOAT16_VALUE(-6.76), SIMDE_FLOAT16_VALUE(-2.81), SIMDE_FLOAT16_VALUE(-8.55) }, + { SIMDE_FLOAT16_VALUE(5.89), SIMDE_FLOAT16_VALUE(-9.99), SIMDE_FLOAT16_VALUE(3.60), SIMDE_FLOAT16_VALUE(-5.29) } } }, + { { SIMDE_FLOAT16_VALUE(-0.56), SIMDE_FLOAT16_VALUE(-4.61), SIMDE_FLOAT16_VALUE(4.12), SIMDE_FLOAT16_VALUE(6.86), + SIMDE_FLOAT16_VALUE(-0.93), SIMDE_FLOAT16_VALUE(8.80), SIMDE_FLOAT16_VALUE(-4.47), SIMDE_FLOAT16_VALUE(-0.16), + SIMDE_FLOAT16_VALUE(-2.20), SIMDE_FLOAT16_VALUE(9.97), SIMDE_FLOAT16_VALUE(2.92), SIMDE_FLOAT16_VALUE(4.50) }, + { { SIMDE_FLOAT16_VALUE(-0.56), SIMDE_FLOAT16_VALUE(-4.61), SIMDE_FLOAT16_VALUE(4.12), SIMDE_FLOAT16_VALUE(6.86) }, + { SIMDE_FLOAT16_VALUE(-0.93), SIMDE_FLOAT16_VALUE(8.80), SIMDE_FLOAT16_VALUE(-4.47), SIMDE_FLOAT16_VALUE(-0.16) }, + { SIMDE_FLOAT16_VALUE(-2.20), SIMDE_FLOAT16_VALUE(9.97), SIMDE_FLOAT16_VALUE(2.92), SIMDE_FLOAT16_VALUE(4.50) } } }, + { { SIMDE_FLOAT16_VALUE(5.33), SIMDE_FLOAT16_VALUE(1.10), SIMDE_FLOAT16_VALUE(-0.07), SIMDE_FLOAT16_VALUE(4.80), + SIMDE_FLOAT16_VALUE(-7.40), SIMDE_FLOAT16_VALUE(4.65), SIMDE_FLOAT16_VALUE(9.31), SIMDE_FLOAT16_VALUE(8.79), + SIMDE_FLOAT16_VALUE(-3.82), SIMDE_FLOAT16_VALUE(4.51), SIMDE_FLOAT16_VALUE(-9.93), SIMDE_FLOAT16_VALUE(-6.90) }, + { { SIMDE_FLOAT16_VALUE(5.33), SIMDE_FLOAT16_VALUE(1.10), SIMDE_FLOAT16_VALUE(-0.07), SIMDE_FLOAT16_VALUE(4.80) }, + { SIMDE_FLOAT16_VALUE(-7.40), SIMDE_FLOAT16_VALUE(4.65), SIMDE_FLOAT16_VALUE(9.31), SIMDE_FLOAT16_VALUE(8.79) }, + { SIMDE_FLOAT16_VALUE(-3.82), SIMDE_FLOAT16_VALUE(4.51), SIMDE_FLOAT16_VALUE(-9.93), SIMDE_FLOAT16_VALUE(-6.90) } } }, + { { SIMDE_FLOAT16_VALUE(6.23), SIMDE_FLOAT16_VALUE(9.84), SIMDE_FLOAT16_VALUE(1.44), SIMDE_FLOAT16_VALUE(-9.15), + SIMDE_FLOAT16_VALUE(0.50), SIMDE_FLOAT16_VALUE(6.55), SIMDE_FLOAT16_VALUE(-3.70), SIMDE_FLOAT16_VALUE(-1.56), + SIMDE_FLOAT16_VALUE(-1.16), SIMDE_FLOAT16_VALUE(1.56), SIMDE_FLOAT16_VALUE(-4.94), SIMDE_FLOAT16_VALUE(-5.71) }, + { { SIMDE_FLOAT16_VALUE(6.23), SIMDE_FLOAT16_VALUE(9.84), SIMDE_FLOAT16_VALUE(1.44), SIMDE_FLOAT16_VALUE(-9.15) }, + { SIMDE_FLOAT16_VALUE(0.50), SIMDE_FLOAT16_VALUE(6.55), SIMDE_FLOAT16_VALUE(-3.70), SIMDE_FLOAT16_VALUE(-1.56) }, + { SIMDE_FLOAT16_VALUE(-1.16), SIMDE_FLOAT16_VALUE(1.56), SIMDE_FLOAT16_VALUE(-4.94), SIMDE_FLOAT16_VALUE(-5.71) } } }, + }; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + simde_float16x4x3_t r = simde_vld1_f16_x3(test_vec[i].buf); + simde_float16x4x3_t expected = {{ + simde_vld1_f16(test_vec[i].expected[0]), + simde_vld1_f16(test_vec[i].expected[1]), + simde_vld1_f16(test_vec[i].expected[2]), + }}; + simde_test_arm_neon_assert_equal_f16x4x3(r, expected, 1); + } + + return 0; +} + static int test_simde_vld1_f32_x3 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -800,6 +851,7 @@ test_simde_vld1_u64_x3 (SIMDE_MUNIT_TEST_ARGS) { SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vld1_f16_x3) SIMDE_TEST_FUNC_LIST_ENTRY(vld1_f32_x3) SIMDE_TEST_FUNC_LIST_ENTRY(vld1_f64_x3) SIMDE_TEST_FUNC_LIST_ENTRY(vld1_s8_x3) diff --git a/test/arm/neon/ld1_x4.c b/test/arm/neon/ld1_x4.c index ed91a87ef..38cb63611 100644 --- a/test/arm/neon/ld1_x4.c +++ b/test/arm/neon/ld1_x4.c @@ -6,6 +6,68 @@ #if !defined(SIMDE_BUG_INTEL_857088) +static int +test_simde_vld1_f16_x4 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 buf[16]; + simde_float16 expected[4][4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(1.49), SIMDE_FLOAT16_VALUE(-9.73), SIMDE_FLOAT16_VALUE(7.36), SIMDE_FLOAT16_VALUE(3.11), + SIMDE_FLOAT16_VALUE(7.22), SIMDE_FLOAT16_VALUE(-2.45), SIMDE_FLOAT16_VALUE(3.68), SIMDE_FLOAT16_VALUE(-1.35), + SIMDE_FLOAT16_VALUE(-6.30), SIMDE_FLOAT16_VALUE(-2.80), SIMDE_FLOAT16_VALUE(3.23), SIMDE_FLOAT16_VALUE(5.48), + SIMDE_FLOAT16_VALUE(4.45), SIMDE_FLOAT16_VALUE(-3.47), SIMDE_FLOAT16_VALUE(7.65), SIMDE_FLOAT16_VALUE(3.68) }, + { { SIMDE_FLOAT16_VALUE(1.49), SIMDE_FLOAT16_VALUE(-9.73), SIMDE_FLOAT16_VALUE(7.36), SIMDE_FLOAT16_VALUE(3.11) }, + { SIMDE_FLOAT16_VALUE(7.22), SIMDE_FLOAT16_VALUE(-2.45), SIMDE_FLOAT16_VALUE(3.68), SIMDE_FLOAT16_VALUE(-1.35) }, + { SIMDE_FLOAT16_VALUE(-6.30), SIMDE_FLOAT16_VALUE(-2.80), SIMDE_FLOAT16_VALUE(3.23), SIMDE_FLOAT16_VALUE(5.48) }, + { SIMDE_FLOAT16_VALUE(4.45), SIMDE_FLOAT16_VALUE(-3.47), SIMDE_FLOAT16_VALUE(7.65), SIMDE_FLOAT16_VALUE(3.68) } } }, + { { SIMDE_FLOAT16_VALUE(-8.94), SIMDE_FLOAT16_VALUE(-1.95), SIMDE_FLOAT16_VALUE(7.40), SIMDE_FLOAT16_VALUE(4.23), + SIMDE_FLOAT16_VALUE(1.06), SIMDE_FLOAT16_VALUE(-0.17), SIMDE_FLOAT16_VALUE(-7.00), SIMDE_FLOAT16_VALUE(9.69), + SIMDE_FLOAT16_VALUE(-1.56), SIMDE_FLOAT16_VALUE(2.45), SIMDE_FLOAT16_VALUE(-4.16), SIMDE_FLOAT16_VALUE(-6.16), + SIMDE_FLOAT16_VALUE(6.09), SIMDE_FLOAT16_VALUE(3.06), SIMDE_FLOAT16_VALUE(-0.57), SIMDE_FLOAT16_VALUE(4.51) }, + { { SIMDE_FLOAT16_VALUE(-8.94), SIMDE_FLOAT16_VALUE(-1.95), SIMDE_FLOAT16_VALUE(7.40), SIMDE_FLOAT16_VALUE(4.23) }, + { SIMDE_FLOAT16_VALUE(1.06), SIMDE_FLOAT16_VALUE(-0.17), SIMDE_FLOAT16_VALUE(-7.00), SIMDE_FLOAT16_VALUE(9.69) }, + { SIMDE_FLOAT16_VALUE(-1.56), SIMDE_FLOAT16_VALUE(2.45), SIMDE_FLOAT16_VALUE(-4.16), SIMDE_FLOAT16_VALUE(-6.16) }, + { SIMDE_FLOAT16_VALUE(6.09), SIMDE_FLOAT16_VALUE(3.06), SIMDE_FLOAT16_VALUE(-0.57), SIMDE_FLOAT16_VALUE(4.51) } } }, + { { SIMDE_FLOAT16_VALUE(6.06), SIMDE_FLOAT16_VALUE(-0.83), SIMDE_FLOAT16_VALUE(-8.07), SIMDE_FLOAT16_VALUE(-8.13), + SIMDE_FLOAT16_VALUE(-2.78), SIMDE_FLOAT16_VALUE(-9.03), SIMDE_FLOAT16_VALUE(0.63), SIMDE_FLOAT16_VALUE(-6.69), + SIMDE_FLOAT16_VALUE(3.21), SIMDE_FLOAT16_VALUE(-7.11), SIMDE_FLOAT16_VALUE(2.09), SIMDE_FLOAT16_VALUE(5.06), + SIMDE_FLOAT16_VALUE(-0.76), SIMDE_FLOAT16_VALUE(4.49), SIMDE_FLOAT16_VALUE(-8.49), SIMDE_FLOAT16_VALUE(-9.25) }, + { { SIMDE_FLOAT16_VALUE(6.06), SIMDE_FLOAT16_VALUE(-0.83), SIMDE_FLOAT16_VALUE(-8.07), SIMDE_FLOAT16_VALUE(-8.13) }, + { SIMDE_FLOAT16_VALUE(-2.78), SIMDE_FLOAT16_VALUE(-9.03), SIMDE_FLOAT16_VALUE(0.63), SIMDE_FLOAT16_VALUE(-6.69) }, + { SIMDE_FLOAT16_VALUE(3.21), SIMDE_FLOAT16_VALUE(-7.11), SIMDE_FLOAT16_VALUE(2.09), SIMDE_FLOAT16_VALUE(5.06) }, + { SIMDE_FLOAT16_VALUE(-0.76), SIMDE_FLOAT16_VALUE(4.49), SIMDE_FLOAT16_VALUE(-8.49), SIMDE_FLOAT16_VALUE(-9.25) } } }, + { { SIMDE_FLOAT16_VALUE(9.79), SIMDE_FLOAT16_VALUE(2.62), SIMDE_FLOAT16_VALUE(-9.19), SIMDE_FLOAT16_VALUE(3.73), + SIMDE_FLOAT16_VALUE(1.03), SIMDE_FLOAT16_VALUE(8.35), SIMDE_FLOAT16_VALUE(-3.15), SIMDE_FLOAT16_VALUE(0.58), + SIMDE_FLOAT16_VALUE(-3.14), SIMDE_FLOAT16_VALUE(-8.78), SIMDE_FLOAT16_VALUE(7.54), SIMDE_FLOAT16_VALUE(-1.07), + SIMDE_FLOAT16_VALUE(-8.37), SIMDE_FLOAT16_VALUE(7.13), SIMDE_FLOAT16_VALUE(-5.82), SIMDE_FLOAT16_VALUE(8.91) }, + { { SIMDE_FLOAT16_VALUE(9.79), SIMDE_FLOAT16_VALUE(2.62), SIMDE_FLOAT16_VALUE(-9.19), SIMDE_FLOAT16_VALUE(3.73) }, + { SIMDE_FLOAT16_VALUE(1.03), SIMDE_FLOAT16_VALUE(8.35), SIMDE_FLOAT16_VALUE(-3.15), SIMDE_FLOAT16_VALUE(0.58) }, + { SIMDE_FLOAT16_VALUE(-3.14), SIMDE_FLOAT16_VALUE(-8.78), SIMDE_FLOAT16_VALUE(7.54), SIMDE_FLOAT16_VALUE(-1.07) }, + { SIMDE_FLOAT16_VALUE(-8.37), SIMDE_FLOAT16_VALUE(7.13), SIMDE_FLOAT16_VALUE(-5.82), SIMDE_FLOAT16_VALUE(8.91) } } }, + { { SIMDE_FLOAT16_VALUE(-9.77), SIMDE_FLOAT16_VALUE(6.23), SIMDE_FLOAT16_VALUE(3.40), SIMDE_FLOAT16_VALUE(-7.45), + SIMDE_FLOAT16_VALUE(7.00), SIMDE_FLOAT16_VALUE(-0.92), SIMDE_FLOAT16_VALUE(3.60), SIMDE_FLOAT16_VALUE(3.19), + SIMDE_FLOAT16_VALUE(3.24), SIMDE_FLOAT16_VALUE(8.21), SIMDE_FLOAT16_VALUE(-1.51), SIMDE_FLOAT16_VALUE(4.81), + SIMDE_FLOAT16_VALUE(2.05), SIMDE_FLOAT16_VALUE(-3.18), SIMDE_FLOAT16_VALUE(8.26), SIMDE_FLOAT16_VALUE(-6.38) }, + { { SIMDE_FLOAT16_VALUE(-9.77), SIMDE_FLOAT16_VALUE(6.23), SIMDE_FLOAT16_VALUE(3.40), SIMDE_FLOAT16_VALUE(-7.45) }, + { SIMDE_FLOAT16_VALUE(7.00), SIMDE_FLOAT16_VALUE(-0.92), SIMDE_FLOAT16_VALUE(3.60), SIMDE_FLOAT16_VALUE(3.19) }, + { SIMDE_FLOAT16_VALUE(3.24), SIMDE_FLOAT16_VALUE(8.21), SIMDE_FLOAT16_VALUE(-1.51), SIMDE_FLOAT16_VALUE(4.81) }, + { SIMDE_FLOAT16_VALUE(2.05), SIMDE_FLOAT16_VALUE(-3.18), SIMDE_FLOAT16_VALUE(8.26), SIMDE_FLOAT16_VALUE(-6.38) } } }, + }; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + simde_float16x4x4_t r = simde_vld1_f16_x4(test_vec[i].buf); + simde_float16x4x4_t expected = {{ + simde_vld1_f16(test_vec[i].expected[0]), + simde_vld1_f16(test_vec[i].expected[1]), + simde_vld1_f16(test_vec[i].expected[2]), + simde_vld1_f16(test_vec[i].expected[3]), + }}; + simde_test_arm_neon_assert_equal_f16x4x4(r, expected, 1); + } + + return 0; +} + static int test_simde_vld1_f32_x4 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -906,6 +968,7 @@ test_simde_vld1_u64_x4 (SIMDE_MUNIT_TEST_ARGS) { SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vld1_f16_x4) SIMDE_TEST_FUNC_LIST_ENTRY(vld1_f32_x4) SIMDE_TEST_FUNC_LIST_ENTRY(vld1_f64_x4) SIMDE_TEST_FUNC_LIST_ENTRY(vld1_s8_x4) diff --git a/test/arm/neon/ld1q_x2.c b/test/arm/neon/ld1q_x2.c index 743871cb4..9d2bb155c 100644 --- a/test/arm/neon/ld1q_x2.c +++ b/test/arm/neon/ld1q_x2.c @@ -6,6 +6,66 @@ #if !defined(SIMDE_BUG_INTEL_857088) +static int +test_simde_vld1q_f16_x2 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 buf[16]; + simde_float16 expected[2][8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(8.77), SIMDE_FLOAT16_VALUE(-8.18), SIMDE_FLOAT16_VALUE(0.29), SIMDE_FLOAT16_VALUE(-5.13), + SIMDE_FLOAT16_VALUE(-0.49), SIMDE_FLOAT16_VALUE(-5.46), SIMDE_FLOAT16_VALUE(-5.01), SIMDE_FLOAT16_VALUE(9.21), + SIMDE_FLOAT16_VALUE(-5.50), SIMDE_FLOAT16_VALUE(0.96), SIMDE_FLOAT16_VALUE(-8.88), SIMDE_FLOAT16_VALUE(-3.56), + SIMDE_FLOAT16_VALUE(-7.57), SIMDE_FLOAT16_VALUE(4.59), SIMDE_FLOAT16_VALUE(2.11), SIMDE_FLOAT16_VALUE(-8.34) }, + { { SIMDE_FLOAT16_VALUE(8.77), SIMDE_FLOAT16_VALUE(-8.18), SIMDE_FLOAT16_VALUE(0.29), SIMDE_FLOAT16_VALUE(-5.13), + SIMDE_FLOAT16_VALUE(-0.49), SIMDE_FLOAT16_VALUE(-5.46), SIMDE_FLOAT16_VALUE(-5.01), SIMDE_FLOAT16_VALUE(9.21) }, + { SIMDE_FLOAT16_VALUE(-5.50), SIMDE_FLOAT16_VALUE(0.96), SIMDE_FLOAT16_VALUE(-8.88), SIMDE_FLOAT16_VALUE(-3.56), + SIMDE_FLOAT16_VALUE(-7.57), SIMDE_FLOAT16_VALUE(4.59), SIMDE_FLOAT16_VALUE(2.11), SIMDE_FLOAT16_VALUE(-8.34) } } }, + { { SIMDE_FLOAT16_VALUE(-2.41), SIMDE_FLOAT16_VALUE(-4.05), SIMDE_FLOAT16_VALUE(-2.12), SIMDE_FLOAT16_VALUE(8.76), + SIMDE_FLOAT16_VALUE(-9.05), SIMDE_FLOAT16_VALUE(-9.20), SIMDE_FLOAT16_VALUE(-9.36), SIMDE_FLOAT16_VALUE(7.44), + SIMDE_FLOAT16_VALUE(-0.56), SIMDE_FLOAT16_VALUE(-0.19), SIMDE_FLOAT16_VALUE(2.69), SIMDE_FLOAT16_VALUE(-3.86), + SIMDE_FLOAT16_VALUE(8.27), SIMDE_FLOAT16_VALUE(7.78), SIMDE_FLOAT16_VALUE(5.32), SIMDE_FLOAT16_VALUE(8.78) }, + { { SIMDE_FLOAT16_VALUE(-2.41), SIMDE_FLOAT16_VALUE(-4.05), SIMDE_FLOAT16_VALUE(-2.12), SIMDE_FLOAT16_VALUE(8.76), + SIMDE_FLOAT16_VALUE(-9.05), SIMDE_FLOAT16_VALUE(-9.20), SIMDE_FLOAT16_VALUE(-9.36), SIMDE_FLOAT16_VALUE(7.44) }, + { SIMDE_FLOAT16_VALUE(-0.56), SIMDE_FLOAT16_VALUE(-0.19), SIMDE_FLOAT16_VALUE(2.69), SIMDE_FLOAT16_VALUE(-3.86), + SIMDE_FLOAT16_VALUE(8.27), SIMDE_FLOAT16_VALUE(7.78), SIMDE_FLOAT16_VALUE(5.32), SIMDE_FLOAT16_VALUE(8.78) } } }, + { { SIMDE_FLOAT16_VALUE(9.59), SIMDE_FLOAT16_VALUE(-4.45), SIMDE_FLOAT16_VALUE(-4.44), SIMDE_FLOAT16_VALUE(-3.80), + SIMDE_FLOAT16_VALUE(8.02), SIMDE_FLOAT16_VALUE(-6.86), SIMDE_FLOAT16_VALUE(3.22), SIMDE_FLOAT16_VALUE(7.96), + SIMDE_FLOAT16_VALUE(2.96), SIMDE_FLOAT16_VALUE(9.81), SIMDE_FLOAT16_VALUE(-2.28), SIMDE_FLOAT16_VALUE(2.25), + SIMDE_FLOAT16_VALUE(6.08), SIMDE_FLOAT16_VALUE(-2.65), SIMDE_FLOAT16_VALUE(5.79), SIMDE_FLOAT16_VALUE(2.07) }, + { { SIMDE_FLOAT16_VALUE(9.59), SIMDE_FLOAT16_VALUE(-4.45), SIMDE_FLOAT16_VALUE(-4.44), SIMDE_FLOAT16_VALUE(-3.80), + SIMDE_FLOAT16_VALUE(8.02), SIMDE_FLOAT16_VALUE(-6.86), SIMDE_FLOAT16_VALUE(3.22), SIMDE_FLOAT16_VALUE(7.96) }, + { SIMDE_FLOAT16_VALUE(2.96), SIMDE_FLOAT16_VALUE(9.81), SIMDE_FLOAT16_VALUE(-2.28), SIMDE_FLOAT16_VALUE(2.25), + SIMDE_FLOAT16_VALUE(6.08), SIMDE_FLOAT16_VALUE(-2.65), SIMDE_FLOAT16_VALUE(5.79), SIMDE_FLOAT16_VALUE(2.07) } } }, + { { SIMDE_FLOAT16_VALUE(-8.12), SIMDE_FLOAT16_VALUE(-8.56), SIMDE_FLOAT16_VALUE(-3.17), SIMDE_FLOAT16_VALUE(7.97), + SIMDE_FLOAT16_VALUE(-6.75), SIMDE_FLOAT16_VALUE(-7.91), SIMDE_FLOAT16_VALUE(-3.52), SIMDE_FLOAT16_VALUE(4.08), + SIMDE_FLOAT16_VALUE(-7.33), SIMDE_FLOAT16_VALUE(4.96), SIMDE_FLOAT16_VALUE(2.30), SIMDE_FLOAT16_VALUE(4.35), + SIMDE_FLOAT16_VALUE(6.65), SIMDE_FLOAT16_VALUE(-9.83), SIMDE_FLOAT16_VALUE(4.16), SIMDE_FLOAT16_VALUE(-9.32) }, + { { SIMDE_FLOAT16_VALUE(-8.12), SIMDE_FLOAT16_VALUE(-8.56), SIMDE_FLOAT16_VALUE(-3.17), SIMDE_FLOAT16_VALUE(7.97), + SIMDE_FLOAT16_VALUE(-6.75), SIMDE_FLOAT16_VALUE(-7.91), SIMDE_FLOAT16_VALUE(-3.52), SIMDE_FLOAT16_VALUE(4.08) }, + { SIMDE_FLOAT16_VALUE(-7.33), SIMDE_FLOAT16_VALUE(4.96), SIMDE_FLOAT16_VALUE(2.30), SIMDE_FLOAT16_VALUE(4.35), + SIMDE_FLOAT16_VALUE(6.65), SIMDE_FLOAT16_VALUE(-9.83), SIMDE_FLOAT16_VALUE(4.16), SIMDE_FLOAT16_VALUE(-9.32) } } }, + { { SIMDE_FLOAT16_VALUE(-8.89), SIMDE_FLOAT16_VALUE(2.64), SIMDE_FLOAT16_VALUE(8.88), SIMDE_FLOAT16_VALUE(8.92), + SIMDE_FLOAT16_VALUE(1.05), SIMDE_FLOAT16_VALUE(-5.69), SIMDE_FLOAT16_VALUE(1.66), SIMDE_FLOAT16_VALUE(-3.57), + SIMDE_FLOAT16_VALUE(-0.96), SIMDE_FLOAT16_VALUE(2.24), SIMDE_FLOAT16_VALUE(6.98), SIMDE_FLOAT16_VALUE(1.03), + SIMDE_FLOAT16_VALUE(7.90), SIMDE_FLOAT16_VALUE(9.00), SIMDE_FLOAT16_VALUE(-0.34), SIMDE_FLOAT16_VALUE(9.82) }, + { { SIMDE_FLOAT16_VALUE(-8.89), SIMDE_FLOAT16_VALUE(2.64), SIMDE_FLOAT16_VALUE(8.88), SIMDE_FLOAT16_VALUE(8.92), + SIMDE_FLOAT16_VALUE(1.05), SIMDE_FLOAT16_VALUE(-5.69), SIMDE_FLOAT16_VALUE(1.66), SIMDE_FLOAT16_VALUE(-3.57) }, + { SIMDE_FLOAT16_VALUE(-0.96), SIMDE_FLOAT16_VALUE(2.24), SIMDE_FLOAT16_VALUE(6.98), SIMDE_FLOAT16_VALUE(1.03), + SIMDE_FLOAT16_VALUE(7.90), SIMDE_FLOAT16_VALUE(9.00), SIMDE_FLOAT16_VALUE(-0.34), SIMDE_FLOAT16_VALUE(9.82) } } }, + }; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + simde_float16x8x2_t r = simde_vld1q_f16_x2(test_vec[i].buf); + simde_float16x8x2_t expected = {{ + simde_vld1q_f16(test_vec[i].expected[0]), + simde_vld1q_f16(test_vec[i].expected[1]), + }}; + simde_test_arm_neon_assert_equal_f16x8x2(r, expected, 1); + } + + return 0; +} + static int test_simde_vld1q_f32_x2 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -758,6 +818,7 @@ test_simde_vld1q_u64_x2 (SIMDE_MUNIT_TEST_ARGS) { SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vld1q_f16_x2) SIMDE_TEST_FUNC_LIST_ENTRY(vld1q_f32_x2) SIMDE_TEST_FUNC_LIST_ENTRY(vld1q_f64_x2) SIMDE_TEST_FUNC_LIST_ENTRY(vld1q_s8_x2) diff --git a/test/arm/neon/ld1q_x3.c b/test/arm/neon/ld1q_x3.c index 22200ba06..12d5c1ce7 100644 --- a/test/arm/neon/ld1q_x3.c +++ b/test/arm/neon/ld1q_x3.c @@ -6,6 +6,87 @@ #if !defined(SIMDE_BUG_INTEL_857088) +static int +test_simde_vld1q_f16_x3 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 buf[24]; + simde_float16 expected[3][8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(5.77), SIMDE_FLOAT16_VALUE(3.40), SIMDE_FLOAT16_VALUE(8.52), SIMDE_FLOAT16_VALUE(-8.12), + SIMDE_FLOAT16_VALUE(9.75), SIMDE_FLOAT16_VALUE(4.58), SIMDE_FLOAT16_VALUE(-8.12), SIMDE_FLOAT16_VALUE(-0.21), + SIMDE_FLOAT16_VALUE(-8.19), SIMDE_FLOAT16_VALUE(9.31), SIMDE_FLOAT16_VALUE(-3.36), SIMDE_FLOAT16_VALUE(6.68), + SIMDE_FLOAT16_VALUE(-4.82), SIMDE_FLOAT16_VALUE(-7.99), SIMDE_FLOAT16_VALUE(7.60), SIMDE_FLOAT16_VALUE(-5.32), + SIMDE_FLOAT16_VALUE(8.17), SIMDE_FLOAT16_VALUE(-2.82), SIMDE_FLOAT16_VALUE(-0.51), SIMDE_FLOAT16_VALUE(-8.15), + SIMDE_FLOAT16_VALUE(-5.44), SIMDE_FLOAT16_VALUE(8.44), SIMDE_FLOAT16_VALUE(-6.93), SIMDE_FLOAT16_VALUE(2.81) }, + { { SIMDE_FLOAT16_VALUE(5.77), SIMDE_FLOAT16_VALUE(3.40), SIMDE_FLOAT16_VALUE(8.52), SIMDE_FLOAT16_VALUE(-8.12), + SIMDE_FLOAT16_VALUE(9.75), SIMDE_FLOAT16_VALUE(4.58), SIMDE_FLOAT16_VALUE(-8.12), SIMDE_FLOAT16_VALUE(-0.21) }, + { SIMDE_FLOAT16_VALUE(-8.19), SIMDE_FLOAT16_VALUE(9.31), SIMDE_FLOAT16_VALUE(-3.36), SIMDE_FLOAT16_VALUE(6.68), + SIMDE_FLOAT16_VALUE(-4.82), SIMDE_FLOAT16_VALUE(-7.99), SIMDE_FLOAT16_VALUE(7.60), SIMDE_FLOAT16_VALUE(-5.32) }, + { SIMDE_FLOAT16_VALUE(8.17), SIMDE_FLOAT16_VALUE(-2.82), SIMDE_FLOAT16_VALUE(-0.51), SIMDE_FLOAT16_VALUE(-8.15), + SIMDE_FLOAT16_VALUE(-5.44), SIMDE_FLOAT16_VALUE(8.44), SIMDE_FLOAT16_VALUE(-6.93), SIMDE_FLOAT16_VALUE(2.81) } } }, + { { SIMDE_FLOAT16_VALUE(1.57), SIMDE_FLOAT16_VALUE(-2.73), SIMDE_FLOAT16_VALUE(-0.07), SIMDE_FLOAT16_VALUE(9.59), + SIMDE_FLOAT16_VALUE(5.25), SIMDE_FLOAT16_VALUE(-4.36), SIMDE_FLOAT16_VALUE(0.01), SIMDE_FLOAT16_VALUE(-7.95), + SIMDE_FLOAT16_VALUE(-7.48), SIMDE_FLOAT16_VALUE(5.11), SIMDE_FLOAT16_VALUE(2.89), SIMDE_FLOAT16_VALUE(4.88), + SIMDE_FLOAT16_VALUE(0.38), SIMDE_FLOAT16_VALUE(5.53), SIMDE_FLOAT16_VALUE(-1.66), SIMDE_FLOAT16_VALUE(4.24), + SIMDE_FLOAT16_VALUE(5.92), SIMDE_FLOAT16_VALUE(0.65), SIMDE_FLOAT16_VALUE(-9.00), SIMDE_FLOAT16_VALUE(9.26), + SIMDE_FLOAT16_VALUE(-0.65), SIMDE_FLOAT16_VALUE(3.52), SIMDE_FLOAT16_VALUE(8.37), SIMDE_FLOAT16_VALUE(-6.65) }, + { { SIMDE_FLOAT16_VALUE(1.57), SIMDE_FLOAT16_VALUE(-2.73), SIMDE_FLOAT16_VALUE(-0.07), SIMDE_FLOAT16_VALUE(9.59), + SIMDE_FLOAT16_VALUE(5.25), SIMDE_FLOAT16_VALUE(-4.36), SIMDE_FLOAT16_VALUE(0.01), SIMDE_FLOAT16_VALUE(-7.95) }, + { SIMDE_FLOAT16_VALUE(-7.48), SIMDE_FLOAT16_VALUE(5.11), SIMDE_FLOAT16_VALUE(2.89), SIMDE_FLOAT16_VALUE(4.88), + SIMDE_FLOAT16_VALUE(0.38), SIMDE_FLOAT16_VALUE(5.53), SIMDE_FLOAT16_VALUE(-1.66), SIMDE_FLOAT16_VALUE(4.24) }, + { SIMDE_FLOAT16_VALUE(5.92), SIMDE_FLOAT16_VALUE(0.65), SIMDE_FLOAT16_VALUE(-9.00), SIMDE_FLOAT16_VALUE(9.26), + SIMDE_FLOAT16_VALUE(-0.65), SIMDE_FLOAT16_VALUE(3.52), SIMDE_FLOAT16_VALUE(8.37), SIMDE_FLOAT16_VALUE(-6.65) } } }, + { { SIMDE_FLOAT16_VALUE(-1.37), SIMDE_FLOAT16_VALUE(-2.25), SIMDE_FLOAT16_VALUE(1.38), SIMDE_FLOAT16_VALUE(-0.69), + SIMDE_FLOAT16_VALUE(-0.74), SIMDE_FLOAT16_VALUE(-5.25), SIMDE_FLOAT16_VALUE(6.54), SIMDE_FLOAT16_VALUE(-9.57), + SIMDE_FLOAT16_VALUE(8.41), SIMDE_FLOAT16_VALUE(0.56), SIMDE_FLOAT16_VALUE(-3.33), SIMDE_FLOAT16_VALUE(0.61), + SIMDE_FLOAT16_VALUE(2.97), SIMDE_FLOAT16_VALUE(9.80), SIMDE_FLOAT16_VALUE(7.20), SIMDE_FLOAT16_VALUE(-1.70), + SIMDE_FLOAT16_VALUE(6.31), SIMDE_FLOAT16_VALUE(-7.33), SIMDE_FLOAT16_VALUE(8.80), SIMDE_FLOAT16_VALUE(5.75), + SIMDE_FLOAT16_VALUE(4.89), SIMDE_FLOAT16_VALUE(-5.85), SIMDE_FLOAT16_VALUE(-5.80), SIMDE_FLOAT16_VALUE(1.30) }, + { { SIMDE_FLOAT16_VALUE(-1.37), SIMDE_FLOAT16_VALUE(-2.25), SIMDE_FLOAT16_VALUE(1.38), SIMDE_FLOAT16_VALUE(-0.69), + SIMDE_FLOAT16_VALUE(-0.74), SIMDE_FLOAT16_VALUE(-5.25), SIMDE_FLOAT16_VALUE(6.54), SIMDE_FLOAT16_VALUE(-9.57) }, + { SIMDE_FLOAT16_VALUE(8.41), SIMDE_FLOAT16_VALUE(0.56), SIMDE_FLOAT16_VALUE(-3.33), SIMDE_FLOAT16_VALUE(0.61), + SIMDE_FLOAT16_VALUE(2.97), SIMDE_FLOAT16_VALUE(9.80), SIMDE_FLOAT16_VALUE(7.20), SIMDE_FLOAT16_VALUE(-1.70) }, + { SIMDE_FLOAT16_VALUE(6.31), SIMDE_FLOAT16_VALUE(-7.33), SIMDE_FLOAT16_VALUE(8.80), SIMDE_FLOAT16_VALUE(5.75), + SIMDE_FLOAT16_VALUE(4.89), SIMDE_FLOAT16_VALUE(-5.85), SIMDE_FLOAT16_VALUE(-5.80), SIMDE_FLOAT16_VALUE(1.30) } } }, + { { SIMDE_FLOAT16_VALUE(-1.53), SIMDE_FLOAT16_VALUE(1.62), SIMDE_FLOAT16_VALUE(-5.29), SIMDE_FLOAT16_VALUE(-4.50), + SIMDE_FLOAT16_VALUE(-9.77), SIMDE_FLOAT16_VALUE(1.79), SIMDE_FLOAT16_VALUE(4.79), SIMDE_FLOAT16_VALUE(6.77), + SIMDE_FLOAT16_VALUE(2.13), SIMDE_FLOAT16_VALUE(-4.49), SIMDE_FLOAT16_VALUE(-8.50), SIMDE_FLOAT16_VALUE(-4.17), + SIMDE_FLOAT16_VALUE(-7.82), SIMDE_FLOAT16_VALUE(8.62), SIMDE_FLOAT16_VALUE(-9.23), SIMDE_FLOAT16_VALUE(-8.30), + SIMDE_FLOAT16_VALUE(6.89), SIMDE_FLOAT16_VALUE(8.83), SIMDE_FLOAT16_VALUE(-8.78), SIMDE_FLOAT16_VALUE(-2.63), + SIMDE_FLOAT16_VALUE(-4.15), SIMDE_FLOAT16_VALUE(4.43), SIMDE_FLOAT16_VALUE(-1.23), SIMDE_FLOAT16_VALUE(-7.80) }, + { { SIMDE_FLOAT16_VALUE(-1.53), SIMDE_FLOAT16_VALUE(1.62), SIMDE_FLOAT16_VALUE(-5.29), SIMDE_FLOAT16_VALUE(-4.50), + SIMDE_FLOAT16_VALUE(-9.77), SIMDE_FLOAT16_VALUE(1.79), SIMDE_FLOAT16_VALUE(4.79), SIMDE_FLOAT16_VALUE(6.77) }, + { SIMDE_FLOAT16_VALUE(2.13), SIMDE_FLOAT16_VALUE(-4.49), SIMDE_FLOAT16_VALUE(-8.50), SIMDE_FLOAT16_VALUE(-4.17), + SIMDE_FLOAT16_VALUE(-7.82), SIMDE_FLOAT16_VALUE(8.62), SIMDE_FLOAT16_VALUE(-9.23), SIMDE_FLOAT16_VALUE(-8.30) }, + { SIMDE_FLOAT16_VALUE(6.89), SIMDE_FLOAT16_VALUE(8.83), SIMDE_FLOAT16_VALUE(-8.78), SIMDE_FLOAT16_VALUE(-2.63), + SIMDE_FLOAT16_VALUE(-4.15), SIMDE_FLOAT16_VALUE(4.43), SIMDE_FLOAT16_VALUE(-1.23), SIMDE_FLOAT16_VALUE(-7.80) } } }, + { { SIMDE_FLOAT16_VALUE(7.81), SIMDE_FLOAT16_VALUE(-8.12), SIMDE_FLOAT16_VALUE(-5.24), SIMDE_FLOAT16_VALUE(-7.34), + SIMDE_FLOAT16_VALUE(-5.07), SIMDE_FLOAT16_VALUE(8.42), SIMDE_FLOAT16_VALUE(-1.56), SIMDE_FLOAT16_VALUE(3.85), + SIMDE_FLOAT16_VALUE(1.08), SIMDE_FLOAT16_VALUE(-3.58), SIMDE_FLOAT16_VALUE(-1.71), SIMDE_FLOAT16_VALUE(-6.62), + SIMDE_FLOAT16_VALUE(3.82), SIMDE_FLOAT16_VALUE(-0.05), SIMDE_FLOAT16_VALUE(5.35), SIMDE_FLOAT16_VALUE(-4.41), + SIMDE_FLOAT16_VALUE(7.30), SIMDE_FLOAT16_VALUE(-7.74), SIMDE_FLOAT16_VALUE(-9.77), SIMDE_FLOAT16_VALUE(-9.07), + SIMDE_FLOAT16_VALUE(9.48), SIMDE_FLOAT16_VALUE(-2.73), SIMDE_FLOAT16_VALUE(-7.56), SIMDE_FLOAT16_VALUE(3.51) }, + { { SIMDE_FLOAT16_VALUE(7.81), SIMDE_FLOAT16_VALUE(-8.12), SIMDE_FLOAT16_VALUE(-5.24), SIMDE_FLOAT16_VALUE(-7.34), + SIMDE_FLOAT16_VALUE(-5.07), SIMDE_FLOAT16_VALUE(8.42), SIMDE_FLOAT16_VALUE(-1.56), SIMDE_FLOAT16_VALUE(3.85) }, + { SIMDE_FLOAT16_VALUE(1.08), SIMDE_FLOAT16_VALUE(-3.58), SIMDE_FLOAT16_VALUE(-1.71), SIMDE_FLOAT16_VALUE(-6.62), + SIMDE_FLOAT16_VALUE(3.82), SIMDE_FLOAT16_VALUE(-0.05), SIMDE_FLOAT16_VALUE(5.35), SIMDE_FLOAT16_VALUE(-4.41) }, + { SIMDE_FLOAT16_VALUE(7.30), SIMDE_FLOAT16_VALUE(-7.74), SIMDE_FLOAT16_VALUE(-9.77), SIMDE_FLOAT16_VALUE(-9.07), + SIMDE_FLOAT16_VALUE(9.48), SIMDE_FLOAT16_VALUE(-2.73), SIMDE_FLOAT16_VALUE(-7.56), SIMDE_FLOAT16_VALUE(3.51) } } }, + }; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + simde_float16x8x3_t r = simde_vld1q_f16_x3(test_vec[i].buf); + simde_float16x8x3_t expected = {{ + simde_vld1q_f16(test_vec[i].expected[0]), + simde_vld1q_f16(test_vec[i].expected[1]), + simde_vld1q_f16(test_vec[i].expected[2]), + }}; + simde_test_arm_neon_assert_equal_f16x8x3(r, expected, 1); + } + + return 0; +} + static int test_simde_vld1q_f32_x3 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -960,6 +1041,7 @@ test_simde_vld1q_u64_x3 (SIMDE_MUNIT_TEST_ARGS) { SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vld1q_f16_x3) SIMDE_TEST_FUNC_LIST_ENTRY(vld1q_f32_x3) SIMDE_TEST_FUNC_LIST_ENTRY(vld1q_f64_x3) SIMDE_TEST_FUNC_LIST_ENTRY(vld1q_s8_x3) diff --git a/test/arm/neon/ld1q_x4.c b/test/arm/neon/ld1q_x4.c index 2f2827060..15becdbf1 100644 --- a/test/arm/neon/ld1q_x4.c +++ b/test/arm/neon/ld1q_x4.c @@ -6,6 +6,108 @@ #if !defined(SIMDE_BUG_INTEL_857088) +static int +test_simde_vld1q_f16_x4 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 buf[32]; + simde_float16 expected[4][8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(0.77), SIMDE_FLOAT16_VALUE(-4.96), SIMDE_FLOAT16_VALUE(4.32), SIMDE_FLOAT16_VALUE(7.52), + SIMDE_FLOAT16_VALUE(4.70), SIMDE_FLOAT16_VALUE(1.33), SIMDE_FLOAT16_VALUE(-9.47), SIMDE_FLOAT16_VALUE(7.32), + SIMDE_FLOAT16_VALUE(-3.23), SIMDE_FLOAT16_VALUE(0.61), SIMDE_FLOAT16_VALUE(-7.77), SIMDE_FLOAT16_VALUE(1.39), + SIMDE_FLOAT16_VALUE(-3.07), SIMDE_FLOAT16_VALUE(-3.61), SIMDE_FLOAT16_VALUE(1.98), SIMDE_FLOAT16_VALUE(-7.55), + SIMDE_FLOAT16_VALUE(-4.89), SIMDE_FLOAT16_VALUE(2.58), SIMDE_FLOAT16_VALUE(-1.82), SIMDE_FLOAT16_VALUE(-7.17), + SIMDE_FLOAT16_VALUE(8.34), SIMDE_FLOAT16_VALUE(6.72), SIMDE_FLOAT16_VALUE(-7.10), SIMDE_FLOAT16_VALUE(-4.64), + SIMDE_FLOAT16_VALUE(9.47), SIMDE_FLOAT16_VALUE(4.04), SIMDE_FLOAT16_VALUE(0.95), SIMDE_FLOAT16_VALUE(-6.12), + SIMDE_FLOAT16_VALUE(-4.02), SIMDE_FLOAT16_VALUE(-8.97), SIMDE_FLOAT16_VALUE(-5.84), SIMDE_FLOAT16_VALUE(5.81) }, + { { SIMDE_FLOAT16_VALUE(0.77), SIMDE_FLOAT16_VALUE(-4.96), SIMDE_FLOAT16_VALUE(4.32), SIMDE_FLOAT16_VALUE(7.52), + SIMDE_FLOAT16_VALUE(4.70), SIMDE_FLOAT16_VALUE(1.33), SIMDE_FLOAT16_VALUE(-9.47), SIMDE_FLOAT16_VALUE(7.32) }, + { SIMDE_FLOAT16_VALUE(-3.23), SIMDE_FLOAT16_VALUE(0.61), SIMDE_FLOAT16_VALUE(-7.77), SIMDE_FLOAT16_VALUE(1.39), + SIMDE_FLOAT16_VALUE(-3.07), SIMDE_FLOAT16_VALUE(-3.61), SIMDE_FLOAT16_VALUE(1.98), SIMDE_FLOAT16_VALUE(-7.55) }, + { SIMDE_FLOAT16_VALUE(-4.89), SIMDE_FLOAT16_VALUE(2.58), SIMDE_FLOAT16_VALUE(-1.82), SIMDE_FLOAT16_VALUE(-7.17), + SIMDE_FLOAT16_VALUE(8.34), SIMDE_FLOAT16_VALUE(6.72), SIMDE_FLOAT16_VALUE(-7.10), SIMDE_FLOAT16_VALUE(-4.64) }, + { SIMDE_FLOAT16_VALUE(9.47), SIMDE_FLOAT16_VALUE(4.04), SIMDE_FLOAT16_VALUE(0.95), SIMDE_FLOAT16_VALUE(-6.12), + SIMDE_FLOAT16_VALUE(-4.02), SIMDE_FLOAT16_VALUE(-8.97), SIMDE_FLOAT16_VALUE(-5.84), SIMDE_FLOAT16_VALUE(5.81) } } }, + { { SIMDE_FLOAT16_VALUE(5.50), SIMDE_FLOAT16_VALUE(5.24), SIMDE_FLOAT16_VALUE(8.86), SIMDE_FLOAT16_VALUE(4.11), + SIMDE_FLOAT16_VALUE(-9.23), SIMDE_FLOAT16_VALUE(-5.26), SIMDE_FLOAT16_VALUE(2.44), SIMDE_FLOAT16_VALUE(6.43), + SIMDE_FLOAT16_VALUE(1.99), SIMDE_FLOAT16_VALUE(3.20), SIMDE_FLOAT16_VALUE(8.92), SIMDE_FLOAT16_VALUE(-1.64), + SIMDE_FLOAT16_VALUE(7.53), SIMDE_FLOAT16_VALUE(-2.88), SIMDE_FLOAT16_VALUE(1.19), SIMDE_FLOAT16_VALUE(-2.26), + SIMDE_FLOAT16_VALUE(7.85), SIMDE_FLOAT16_VALUE(-3.73), SIMDE_FLOAT16_VALUE(-6.25), SIMDE_FLOAT16_VALUE(-5.78), + SIMDE_FLOAT16_VALUE(-4.64), SIMDE_FLOAT16_VALUE(-6.97), SIMDE_FLOAT16_VALUE(-3.81), SIMDE_FLOAT16_VALUE(-1.47), + SIMDE_FLOAT16_VALUE(8.97), SIMDE_FLOAT16_VALUE(-3.54), SIMDE_FLOAT16_VALUE(-8.46), SIMDE_FLOAT16_VALUE(-0.59), + SIMDE_FLOAT16_VALUE(1.21), SIMDE_FLOAT16_VALUE(-0.55), SIMDE_FLOAT16_VALUE(-3.94), SIMDE_FLOAT16_VALUE(-5.49) }, + { { SIMDE_FLOAT16_VALUE(5.50), SIMDE_FLOAT16_VALUE(5.24), SIMDE_FLOAT16_VALUE(8.86), SIMDE_FLOAT16_VALUE(4.11), + SIMDE_FLOAT16_VALUE(-9.23), SIMDE_FLOAT16_VALUE(-5.26), SIMDE_FLOAT16_VALUE(2.44), SIMDE_FLOAT16_VALUE(6.43) }, + { SIMDE_FLOAT16_VALUE(1.99), SIMDE_FLOAT16_VALUE(3.20), SIMDE_FLOAT16_VALUE(8.92), SIMDE_FLOAT16_VALUE(-1.64), + SIMDE_FLOAT16_VALUE(7.53), SIMDE_FLOAT16_VALUE(-2.88), SIMDE_FLOAT16_VALUE(1.19), SIMDE_FLOAT16_VALUE(-2.26) }, + { SIMDE_FLOAT16_VALUE(7.85), SIMDE_FLOAT16_VALUE(-3.73), SIMDE_FLOAT16_VALUE(-6.25), SIMDE_FLOAT16_VALUE(-5.78), + SIMDE_FLOAT16_VALUE(-4.64), SIMDE_FLOAT16_VALUE(-6.97), SIMDE_FLOAT16_VALUE(-3.81), SIMDE_FLOAT16_VALUE(-1.47) }, + { SIMDE_FLOAT16_VALUE(8.97), SIMDE_FLOAT16_VALUE(-3.54), SIMDE_FLOAT16_VALUE(-8.46), SIMDE_FLOAT16_VALUE(-0.59), + SIMDE_FLOAT16_VALUE(1.21), SIMDE_FLOAT16_VALUE(-0.55), SIMDE_FLOAT16_VALUE(-3.94), SIMDE_FLOAT16_VALUE(-5.49) } } }, + { { SIMDE_FLOAT16_VALUE(0.84), SIMDE_FLOAT16_VALUE(1.66), SIMDE_FLOAT16_VALUE(-0.69), SIMDE_FLOAT16_VALUE(0.03), + SIMDE_FLOAT16_VALUE(-9.89), SIMDE_FLOAT16_VALUE(-1.33), SIMDE_FLOAT16_VALUE(2.77), SIMDE_FLOAT16_VALUE(2.16), + SIMDE_FLOAT16_VALUE(-9.13), SIMDE_FLOAT16_VALUE(7.90), SIMDE_FLOAT16_VALUE(9.03), SIMDE_FLOAT16_VALUE(2.79), + SIMDE_FLOAT16_VALUE(-8.16), SIMDE_FLOAT16_VALUE(-1.11), SIMDE_FLOAT16_VALUE(2.16), SIMDE_FLOAT16_VALUE(5.32), + SIMDE_FLOAT16_VALUE(9.71), SIMDE_FLOAT16_VALUE(-7.58), SIMDE_FLOAT16_VALUE(-0.85), SIMDE_FLOAT16_VALUE(-8.57), + SIMDE_FLOAT16_VALUE(1.14), SIMDE_FLOAT16_VALUE(-0.42), SIMDE_FLOAT16_VALUE(3.32), SIMDE_FLOAT16_VALUE(6.97), + SIMDE_FLOAT16_VALUE(1.24), SIMDE_FLOAT16_VALUE(-8.74), SIMDE_FLOAT16_VALUE(-8.75), SIMDE_FLOAT16_VALUE(-0.72), + SIMDE_FLOAT16_VALUE(5.61), SIMDE_FLOAT16_VALUE(4.15), SIMDE_FLOAT16_VALUE(9.62), SIMDE_FLOAT16_VALUE(-3.79) }, + { { SIMDE_FLOAT16_VALUE(0.84), SIMDE_FLOAT16_VALUE(1.66), SIMDE_FLOAT16_VALUE(-0.69), SIMDE_FLOAT16_VALUE(0.03), + SIMDE_FLOAT16_VALUE(-9.89), SIMDE_FLOAT16_VALUE(-1.33), SIMDE_FLOAT16_VALUE(2.77), SIMDE_FLOAT16_VALUE(2.16) }, + { SIMDE_FLOAT16_VALUE(-9.13), SIMDE_FLOAT16_VALUE(7.90), SIMDE_FLOAT16_VALUE(9.03), SIMDE_FLOAT16_VALUE(2.79), + SIMDE_FLOAT16_VALUE(-8.16), SIMDE_FLOAT16_VALUE(-1.11), SIMDE_FLOAT16_VALUE(2.16), SIMDE_FLOAT16_VALUE(5.32) }, + { SIMDE_FLOAT16_VALUE(9.71), SIMDE_FLOAT16_VALUE(-7.58), SIMDE_FLOAT16_VALUE(-0.85), SIMDE_FLOAT16_VALUE(-8.57), + SIMDE_FLOAT16_VALUE(1.14), SIMDE_FLOAT16_VALUE(-0.42), SIMDE_FLOAT16_VALUE(3.32), SIMDE_FLOAT16_VALUE(6.97) }, + { SIMDE_FLOAT16_VALUE(1.24), SIMDE_FLOAT16_VALUE(-8.74), SIMDE_FLOAT16_VALUE(-8.75), SIMDE_FLOAT16_VALUE(-0.72), + SIMDE_FLOAT16_VALUE(5.61), SIMDE_FLOAT16_VALUE(4.15), SIMDE_FLOAT16_VALUE(9.62), SIMDE_FLOAT16_VALUE(-3.79) } } }, + { { SIMDE_FLOAT16_VALUE(7.00), SIMDE_FLOAT16_VALUE(-5.79), SIMDE_FLOAT16_VALUE(-0.80), SIMDE_FLOAT16_VALUE(8.08), + SIMDE_FLOAT16_VALUE(-7.63), SIMDE_FLOAT16_VALUE(8.06), SIMDE_FLOAT16_VALUE(3.73), SIMDE_FLOAT16_VALUE(5.38), + SIMDE_FLOAT16_VALUE(-8.76), SIMDE_FLOAT16_VALUE(-3.66), SIMDE_FLOAT16_VALUE(-1.79), SIMDE_FLOAT16_VALUE(4.39), + SIMDE_FLOAT16_VALUE(4.40), SIMDE_FLOAT16_VALUE(-1.74), SIMDE_FLOAT16_VALUE(6.02), SIMDE_FLOAT16_VALUE(1.11), + SIMDE_FLOAT16_VALUE(5.43), SIMDE_FLOAT16_VALUE(-1.01), SIMDE_FLOAT16_VALUE(1.85), SIMDE_FLOAT16_VALUE(-2.65), + SIMDE_FLOAT16_VALUE(-9.84), SIMDE_FLOAT16_VALUE(-3.62), SIMDE_FLOAT16_VALUE(8.59), SIMDE_FLOAT16_VALUE(-1.78), + SIMDE_FLOAT16_VALUE(-5.54), SIMDE_FLOAT16_VALUE(2.82), SIMDE_FLOAT16_VALUE(8.26), SIMDE_FLOAT16_VALUE(9.23), + SIMDE_FLOAT16_VALUE(0.57), SIMDE_FLOAT16_VALUE(1.65), SIMDE_FLOAT16_VALUE(6.48), SIMDE_FLOAT16_VALUE(6.43) }, + { { SIMDE_FLOAT16_VALUE(7.00), SIMDE_FLOAT16_VALUE(-5.79), SIMDE_FLOAT16_VALUE(-0.80), SIMDE_FLOAT16_VALUE(8.08), + SIMDE_FLOAT16_VALUE(-7.63), SIMDE_FLOAT16_VALUE(8.06), SIMDE_FLOAT16_VALUE(3.73), SIMDE_FLOAT16_VALUE(5.38) }, + { SIMDE_FLOAT16_VALUE(-8.76), SIMDE_FLOAT16_VALUE(-3.66), SIMDE_FLOAT16_VALUE(-1.79), SIMDE_FLOAT16_VALUE(4.39), + SIMDE_FLOAT16_VALUE(4.40), SIMDE_FLOAT16_VALUE(-1.74), SIMDE_FLOAT16_VALUE(6.02), SIMDE_FLOAT16_VALUE(1.11) }, + { SIMDE_FLOAT16_VALUE(5.43), SIMDE_FLOAT16_VALUE(-1.01), SIMDE_FLOAT16_VALUE(1.85), SIMDE_FLOAT16_VALUE(-2.65), + SIMDE_FLOAT16_VALUE(-9.84), SIMDE_FLOAT16_VALUE(-3.62), SIMDE_FLOAT16_VALUE(8.59), SIMDE_FLOAT16_VALUE(-1.78) }, + { SIMDE_FLOAT16_VALUE(-5.54), SIMDE_FLOAT16_VALUE(2.82), SIMDE_FLOAT16_VALUE(8.26), SIMDE_FLOAT16_VALUE(9.23), + SIMDE_FLOAT16_VALUE(0.57), SIMDE_FLOAT16_VALUE(1.65), SIMDE_FLOAT16_VALUE(6.48), SIMDE_FLOAT16_VALUE(6.43) } } }, + { { SIMDE_FLOAT16_VALUE(-9.54), SIMDE_FLOAT16_VALUE(7.35), SIMDE_FLOAT16_VALUE(-8.55), SIMDE_FLOAT16_VALUE(5.24), + SIMDE_FLOAT16_VALUE(-9.39), SIMDE_FLOAT16_VALUE(-8.81), SIMDE_FLOAT16_VALUE(-2.52), SIMDE_FLOAT16_VALUE(3.42), + SIMDE_FLOAT16_VALUE(8.62), SIMDE_FLOAT16_VALUE(-3.83), SIMDE_FLOAT16_VALUE(-3.54), SIMDE_FLOAT16_VALUE(0.09), + SIMDE_FLOAT16_VALUE(5.41), SIMDE_FLOAT16_VALUE(1.42), SIMDE_FLOAT16_VALUE(-8.33), SIMDE_FLOAT16_VALUE(9.12), + SIMDE_FLOAT16_VALUE(3.57), SIMDE_FLOAT16_VALUE(-6.64), SIMDE_FLOAT16_VALUE(-9.86), SIMDE_FLOAT16_VALUE(6.74), + SIMDE_FLOAT16_VALUE(-8.50), SIMDE_FLOAT16_VALUE(-5.99), SIMDE_FLOAT16_VALUE(-5.00), SIMDE_FLOAT16_VALUE(-8.11), + SIMDE_FLOAT16_VALUE(2.43), SIMDE_FLOAT16_VALUE(8.06), SIMDE_FLOAT16_VALUE(8.39), SIMDE_FLOAT16_VALUE(-5.99), + SIMDE_FLOAT16_VALUE(-3.26), SIMDE_FLOAT16_VALUE(-5.56), SIMDE_FLOAT16_VALUE(-6.74), SIMDE_FLOAT16_VALUE(-7.39) }, + { { SIMDE_FLOAT16_VALUE(-9.54), SIMDE_FLOAT16_VALUE(7.35), SIMDE_FLOAT16_VALUE(-8.55), SIMDE_FLOAT16_VALUE(5.24), + SIMDE_FLOAT16_VALUE(-9.39), SIMDE_FLOAT16_VALUE(-8.81), SIMDE_FLOAT16_VALUE(-2.52), SIMDE_FLOAT16_VALUE(3.42) }, + { SIMDE_FLOAT16_VALUE(8.62), SIMDE_FLOAT16_VALUE(-3.83), SIMDE_FLOAT16_VALUE(-3.54), SIMDE_FLOAT16_VALUE(0.09), + SIMDE_FLOAT16_VALUE(5.41), SIMDE_FLOAT16_VALUE(1.42), SIMDE_FLOAT16_VALUE(-8.33), SIMDE_FLOAT16_VALUE(9.12) }, + { SIMDE_FLOAT16_VALUE(3.57), SIMDE_FLOAT16_VALUE(-6.64), SIMDE_FLOAT16_VALUE(-9.86), SIMDE_FLOAT16_VALUE(6.74), + SIMDE_FLOAT16_VALUE(-8.50), SIMDE_FLOAT16_VALUE(-5.99), SIMDE_FLOAT16_VALUE(-5.00), SIMDE_FLOAT16_VALUE(-8.11) }, + { SIMDE_FLOAT16_VALUE(2.43), SIMDE_FLOAT16_VALUE(8.06), SIMDE_FLOAT16_VALUE(8.39), SIMDE_FLOAT16_VALUE(-5.99), + SIMDE_FLOAT16_VALUE(-3.26), SIMDE_FLOAT16_VALUE(-5.56), SIMDE_FLOAT16_VALUE(-6.74), SIMDE_FLOAT16_VALUE(-7.39) } } }, + }; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + simde_float16x8x4_t r = simde_vld1q_f16_x4(test_vec[i].buf); + simde_float16x8x4_t expected = {{ + simde_vld1q_f16(test_vec[i].expected[0]), + simde_vld1q_f16(test_vec[i].expected[1]), + simde_vld1q_f16(test_vec[i].expected[2]), + simde_vld1q_f16(test_vec[i].expected[3]), + }}; + simde_test_arm_neon_assert_equal_f16x8x4(r, expected, 1); + } + + return 0; +} + static int test_simde_vld1q_f32_x4 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1122,6 +1224,7 @@ test_simde_vld1q_u64_x4 (SIMDE_MUNIT_TEST_ARGS) { SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vld1q_f16_x4) SIMDE_TEST_FUNC_LIST_ENTRY(vld1q_f32_x4) SIMDE_TEST_FUNC_LIST_ENTRY(vld1q_f64_x4) SIMDE_TEST_FUNC_LIST_ENTRY(vld1q_s8_x4) diff --git a/test/arm/neon/ld2.c b/test/arm/neon/ld2.c index 63412fe7d..6d5b0b15f 100644 --- a/test/arm/neon/ld2.c +++ b/test/arm/neon/ld2.c @@ -601,6 +601,59 @@ test_simde_vld2_u64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vld2_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[8]; + simde_float16_t r[2][4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-9.278), SIMDE_FLOAT16_VALUE(-6.969), SIMDE_FLOAT16_VALUE(1.663), SIMDE_FLOAT16_VALUE(-2.545), + SIMDE_FLOAT16_VALUE(-2.609), SIMDE_FLOAT16_VALUE(6.118), SIMDE_FLOAT16_VALUE(2.903), SIMDE_FLOAT16_VALUE(-6.328) }, + { { SIMDE_FLOAT16_VALUE(-9.278), SIMDE_FLOAT16_VALUE(1.663), SIMDE_FLOAT16_VALUE(-2.609), SIMDE_FLOAT16_VALUE(2.903) }, + { SIMDE_FLOAT16_VALUE(-6.969), SIMDE_FLOAT16_VALUE(-2.545), SIMDE_FLOAT16_VALUE(6.118), SIMDE_FLOAT16_VALUE(-6.328) } } }, + { { SIMDE_FLOAT16_VALUE(-1.322), SIMDE_FLOAT16_VALUE(-3.593), SIMDE_FLOAT16_VALUE(-1.637), SIMDE_FLOAT16_VALUE(3.347), + SIMDE_FLOAT16_VALUE(-9.161), SIMDE_FLOAT16_VALUE(-9.575), SIMDE_FLOAT16_VALUE(7.821), SIMDE_FLOAT16_VALUE(3.600) }, + { { SIMDE_FLOAT16_VALUE(-1.322), SIMDE_FLOAT16_VALUE(-1.637), SIMDE_FLOAT16_VALUE(-9.161), SIMDE_FLOAT16_VALUE(7.821) }, + { SIMDE_FLOAT16_VALUE(-3.593), SIMDE_FLOAT16_VALUE(3.347), SIMDE_FLOAT16_VALUE(-9.575), SIMDE_FLOAT16_VALUE(3.600) } } }, + { { SIMDE_FLOAT16_VALUE(-8.482), SIMDE_FLOAT16_VALUE(-4.579), SIMDE_FLOAT16_VALUE(-0.542), SIMDE_FLOAT16_VALUE(-6.415), + SIMDE_FLOAT16_VALUE(-4.808), SIMDE_FLOAT16_VALUE(-0.746), SIMDE_FLOAT16_VALUE(6.877), SIMDE_FLOAT16_VALUE(-9.631) }, + { { SIMDE_FLOAT16_VALUE(-8.482), SIMDE_FLOAT16_VALUE(-0.542), SIMDE_FLOAT16_VALUE(-4.808), SIMDE_FLOAT16_VALUE(6.877) }, + { SIMDE_FLOAT16_VALUE(-4.579), SIMDE_FLOAT16_VALUE(-6.415), SIMDE_FLOAT16_VALUE(-0.746), SIMDE_FLOAT16_VALUE(-9.631) } } }, + { { SIMDE_FLOAT16_VALUE(-7.341), SIMDE_FLOAT16_VALUE(-3.836), SIMDE_FLOAT16_VALUE(-0.189), SIMDE_FLOAT16_VALUE(-2.384), + SIMDE_FLOAT16_VALUE(2.127), SIMDE_FLOAT16_VALUE(1.646), SIMDE_FLOAT16_VALUE(7.168), SIMDE_FLOAT16_VALUE(-5.215) }, + { { SIMDE_FLOAT16_VALUE(-7.341), SIMDE_FLOAT16_VALUE(-0.189), SIMDE_FLOAT16_VALUE(2.127), SIMDE_FLOAT16_VALUE(7.168) }, + { SIMDE_FLOAT16_VALUE(-3.836), SIMDE_FLOAT16_VALUE(-2.384), SIMDE_FLOAT16_VALUE(1.646), SIMDE_FLOAT16_VALUE(-5.215) } } }, + { { SIMDE_FLOAT16_VALUE(-2.854), SIMDE_FLOAT16_VALUE(-6.178), SIMDE_FLOAT16_VALUE(6.962), SIMDE_FLOAT16_VALUE(2.037), + SIMDE_FLOAT16_VALUE(-2.141), SIMDE_FLOAT16_VALUE(1.682), SIMDE_FLOAT16_VALUE(-8.167), SIMDE_FLOAT16_VALUE(-2.859) }, + { { SIMDE_FLOAT16_VALUE(-2.854), SIMDE_FLOAT16_VALUE(6.962), SIMDE_FLOAT16_VALUE(-2.141), SIMDE_FLOAT16_VALUE(-8.167) }, + { SIMDE_FLOAT16_VALUE(-6.178), SIMDE_FLOAT16_VALUE(2.037), SIMDE_FLOAT16_VALUE(1.682), SIMDE_FLOAT16_VALUE(-2.859) } } }, + { { SIMDE_FLOAT16_VALUE(1.069), SIMDE_FLOAT16_VALUE(6.073), SIMDE_FLOAT16_VALUE(-7.430), SIMDE_FLOAT16_VALUE(5.842), + SIMDE_FLOAT16_VALUE(0.741), SIMDE_FLOAT16_VALUE(-7.806), SIMDE_FLOAT16_VALUE(-1.530), SIMDE_FLOAT16_VALUE(5.652) }, + { { SIMDE_FLOAT16_VALUE(1.069), SIMDE_FLOAT16_VALUE(-7.430), SIMDE_FLOAT16_VALUE(0.741), SIMDE_FLOAT16_VALUE(-1.530) }, + { SIMDE_FLOAT16_VALUE(6.073), SIMDE_FLOAT16_VALUE(5.842), SIMDE_FLOAT16_VALUE(-7.806), SIMDE_FLOAT16_VALUE(5.652) } } }, + { { SIMDE_FLOAT16_VALUE(2.665), SIMDE_FLOAT16_VALUE(-0.358), SIMDE_FLOAT16_VALUE(4.267), SIMDE_FLOAT16_VALUE(6.043), + SIMDE_FLOAT16_VALUE(4.497), SIMDE_FLOAT16_VALUE(2.667), SIMDE_FLOAT16_VALUE(7.972), SIMDE_FLOAT16_VALUE(-6.815) }, + { { SIMDE_FLOAT16_VALUE(2.665), SIMDE_FLOAT16_VALUE(4.267), SIMDE_FLOAT16_VALUE(4.497), SIMDE_FLOAT16_VALUE(7.972) }, + { SIMDE_FLOAT16_VALUE(-0.358), SIMDE_FLOAT16_VALUE(6.043), SIMDE_FLOAT16_VALUE(2.667), SIMDE_FLOAT16_VALUE(-6.815) } } }, + { { SIMDE_FLOAT16_VALUE(1.068), SIMDE_FLOAT16_VALUE(3.596), SIMDE_FLOAT16_VALUE(-8.704), SIMDE_FLOAT16_VALUE(5.005), + SIMDE_FLOAT16_VALUE(4.827), SIMDE_FLOAT16_VALUE(4.545), SIMDE_FLOAT16_VALUE(0.047), SIMDE_FLOAT16_VALUE(2.680) }, + { { SIMDE_FLOAT16_VALUE(1.068), SIMDE_FLOAT16_VALUE(-8.704), SIMDE_FLOAT16_VALUE(4.827), SIMDE_FLOAT16_VALUE(0.047) }, + { SIMDE_FLOAT16_VALUE(3.596), SIMDE_FLOAT16_VALUE(5.005), SIMDE_FLOAT16_VALUE(4.545), SIMDE_FLOAT16_VALUE(2.680) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4x2_t r = simde_vld2_f16(test_vec[i].a); + + simde_float16x4x2_t expected = { + {simde_vld1_f16(test_vec[i].r[0]), simde_vld1_f16(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_f16x4(r.val[0], expected.val[0], 1); + simde_test_arm_neon_assert_equal_f16x4(r.val[1], expected.val[1], 1); + } + + return 0; +} + static int test_simde_vld2_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1679,6 +1732,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vld2_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vld2_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vld2_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vld2_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vld2_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vld2_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_u8) diff --git a/test/arm/neon/ld2_dup.c b/test/arm/neon/ld2_dup.c new file mode 100644 index 000000000..6f2f91706 --- /dev/null +++ b/test/arm/neon/ld2_dup.c @@ -0,0 +1,1315 @@ +#define SIMDE_TEST_ARM_NEON_INSN ld2_dup + +#include "test-neon.h" +#include "../../../simde/arm/neon/ld2_dup.h" + +#if !defined(SIMDE_BUG_INTEL_857088) + +static int +test_simde_vld2_dup_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[2]; + simde_float16_t unused[2]; + simde_float16_t r[2][4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(39.04), SIMDE_FLOAT16_VALUE(10.61) }, + { SIMDE_FLOAT16_VALUE(-41.47), SIMDE_FLOAT16_VALUE(28.54)}, + { { SIMDE_FLOAT16_VALUE(39.04), SIMDE_FLOAT16_VALUE(39.04), SIMDE_FLOAT16_VALUE(39.04), SIMDE_FLOAT16_VALUE(39.04) }, + { SIMDE_FLOAT16_VALUE(10.61), SIMDE_FLOAT16_VALUE(10.61), SIMDE_FLOAT16_VALUE(10.61), SIMDE_FLOAT16_VALUE(10.61) } } }, + { { SIMDE_FLOAT16_VALUE(-26.09), SIMDE_FLOAT16_VALUE(-32.09) }, + { SIMDE_FLOAT16_VALUE(-21.60), SIMDE_FLOAT16_VALUE(-5.65)}, + { { SIMDE_FLOAT16_VALUE(-26.09), SIMDE_FLOAT16_VALUE(-26.09), SIMDE_FLOAT16_VALUE(-26.09), SIMDE_FLOAT16_VALUE(-26.09) }, + { SIMDE_FLOAT16_VALUE(-32.09), SIMDE_FLOAT16_VALUE(-32.09), SIMDE_FLOAT16_VALUE(-32.09), SIMDE_FLOAT16_VALUE(-32.09) } } }, + { { SIMDE_FLOAT16_VALUE(27.51), SIMDE_FLOAT16_VALUE(-41.83) }, + { SIMDE_FLOAT16_VALUE(21.45), SIMDE_FLOAT16_VALUE(9.79)}, + { { SIMDE_FLOAT16_VALUE(27.51), SIMDE_FLOAT16_VALUE(27.51), SIMDE_FLOAT16_VALUE(27.51), SIMDE_FLOAT16_VALUE(27.51) }, + { SIMDE_FLOAT16_VALUE(-41.83), SIMDE_FLOAT16_VALUE(-41.83), SIMDE_FLOAT16_VALUE(-41.83), SIMDE_FLOAT16_VALUE(-41.83) } } }, + { { SIMDE_FLOAT16_VALUE(-19.53), SIMDE_FLOAT16_VALUE(-7.90) }, + { SIMDE_FLOAT16_VALUE(-3.62), SIMDE_FLOAT16_VALUE(17.75)}, + { { SIMDE_FLOAT16_VALUE(-19.53), SIMDE_FLOAT16_VALUE(-19.53), SIMDE_FLOAT16_VALUE(-19.53), SIMDE_FLOAT16_VALUE(-19.53) }, + { SIMDE_FLOAT16_VALUE(-7.90), SIMDE_FLOAT16_VALUE(-7.90), SIMDE_FLOAT16_VALUE(-7.90), SIMDE_FLOAT16_VALUE(-7.90) } } }, + { { SIMDE_FLOAT16_VALUE(-22.86), SIMDE_FLOAT16_VALUE(5.30) }, + { SIMDE_FLOAT16_VALUE(-42.71), SIMDE_FLOAT16_VALUE(-2.30)}, + { { SIMDE_FLOAT16_VALUE(-22.86), SIMDE_FLOAT16_VALUE(-22.86), SIMDE_FLOAT16_VALUE(-22.86), SIMDE_FLOAT16_VALUE(-22.86) }, + { SIMDE_FLOAT16_VALUE(5.30), SIMDE_FLOAT16_VALUE(5.30), SIMDE_FLOAT16_VALUE(5.30), SIMDE_FLOAT16_VALUE(5.30) } } }, + { { SIMDE_FLOAT16_VALUE(4.74), SIMDE_FLOAT16_VALUE(43.69) }, + { SIMDE_FLOAT16_VALUE(-49.64), SIMDE_FLOAT16_VALUE(-29.99)}, + { { SIMDE_FLOAT16_VALUE(4.74), SIMDE_FLOAT16_VALUE(4.74), SIMDE_FLOAT16_VALUE(4.74), SIMDE_FLOAT16_VALUE(4.74) }, + { SIMDE_FLOAT16_VALUE(43.69), SIMDE_FLOAT16_VALUE(43.69), SIMDE_FLOAT16_VALUE(43.69), SIMDE_FLOAT16_VALUE(43.69) } } }, + { { SIMDE_FLOAT16_VALUE(-1.69), SIMDE_FLOAT16_VALUE(-25.30) }, + { SIMDE_FLOAT16_VALUE(4.77), SIMDE_FLOAT16_VALUE(22.66)}, + { { SIMDE_FLOAT16_VALUE(-1.69), SIMDE_FLOAT16_VALUE(-1.69), SIMDE_FLOAT16_VALUE(-1.69), SIMDE_FLOAT16_VALUE(-1.69) }, + { SIMDE_FLOAT16_VALUE(-25.30), SIMDE_FLOAT16_VALUE(-25.30), SIMDE_FLOAT16_VALUE(-25.30), SIMDE_FLOAT16_VALUE(-25.30) } } }, + { { SIMDE_FLOAT16_VALUE(45.42), SIMDE_FLOAT16_VALUE(-42.46) }, + { SIMDE_FLOAT16_VALUE(-4.86), SIMDE_FLOAT16_VALUE(42.78)}, + { { SIMDE_FLOAT16_VALUE(45.42), SIMDE_FLOAT16_VALUE(45.42), SIMDE_FLOAT16_VALUE(45.42), SIMDE_FLOAT16_VALUE(45.42) }, + { SIMDE_FLOAT16_VALUE(-42.46), SIMDE_FLOAT16_VALUE(-42.46), SIMDE_FLOAT16_VALUE(-42.46), SIMDE_FLOAT16_VALUE(-42.46) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4x2_t r = simde_vld2_dup_f16(test_vec[i].a); + simde_float16x4x2_t expected = { + {simde_vld1_f16(test_vec[i].r[0]), simde_vld1_f16(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_f16x4(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f16x4(r.val[1], expected.val[1], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld2_dup_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + float a[2]; + float unused[2]; + float r[2][2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(2307.68), SIMDE_FLOAT32_C(-1583.29) }, + { SIMDE_FLOAT32_C(2731.95), SIMDE_FLOAT32_C(2210.99)}, + { { SIMDE_FLOAT32_C(2307.68), SIMDE_FLOAT32_C(2307.68) }, + { SIMDE_FLOAT32_C(-1583.29), SIMDE_FLOAT32_C(-1583.29) } } }, + { { SIMDE_FLOAT32_C(2852.12), SIMDE_FLOAT32_C(-4110.41) }, + { SIMDE_FLOAT32_C(-2600.49), SIMDE_FLOAT32_C(2857.57)}, + { { SIMDE_FLOAT32_C(2852.12), SIMDE_FLOAT32_C(2852.12) }, + { SIMDE_FLOAT32_C(-4110.41), SIMDE_FLOAT32_C(-4110.41) } } }, + { { SIMDE_FLOAT32_C(-3149.95), SIMDE_FLOAT32_C(-4330.61) }, + { SIMDE_FLOAT32_C(-560.25), SIMDE_FLOAT32_C(-1571.36)}, + { { SIMDE_FLOAT32_C(-3149.95), SIMDE_FLOAT32_C(-3149.95) }, + { SIMDE_FLOAT32_C(-4330.61), SIMDE_FLOAT32_C(-4330.61) } } }, + { { SIMDE_FLOAT32_C(3391.61), SIMDE_FLOAT32_C(1571.53) }, + { SIMDE_FLOAT32_C(-2233.86), SIMDE_FLOAT32_C(-4968.04)}, + { { SIMDE_FLOAT32_C(3391.61), SIMDE_FLOAT32_C(3391.61) }, + { SIMDE_FLOAT32_C(1571.53), SIMDE_FLOAT32_C(1571.53) } } }, + { { SIMDE_FLOAT32_C(4895.92), SIMDE_FLOAT32_C(507.81) }, + { SIMDE_FLOAT32_C(-2108.13), SIMDE_FLOAT32_C(-3826.15)}, + { { SIMDE_FLOAT32_C(4895.92), SIMDE_FLOAT32_C(4895.92) }, + { SIMDE_FLOAT32_C(507.81), SIMDE_FLOAT32_C(507.81) } } }, + { { SIMDE_FLOAT32_C(-4259.22), SIMDE_FLOAT32_C(-855.54) }, + { SIMDE_FLOAT32_C(-377.33), SIMDE_FLOAT32_C(-2791.55)}, + { { SIMDE_FLOAT32_C(-4259.22), SIMDE_FLOAT32_C(-4259.22) }, + { SIMDE_FLOAT32_C(-855.54), SIMDE_FLOAT32_C(-855.54) } } }, + { { SIMDE_FLOAT32_C(1929.17), SIMDE_FLOAT32_C(-2568.21) }, + { SIMDE_FLOAT32_C(3645.74), SIMDE_FLOAT32_C(4258.92)}, + { { SIMDE_FLOAT32_C(1929.17), SIMDE_FLOAT32_C(1929.17) }, + { SIMDE_FLOAT32_C(-2568.21), SIMDE_FLOAT32_C(-2568.21) } } }, + { { SIMDE_FLOAT32_C(2173.72), SIMDE_FLOAT32_C(-600.35) }, + { SIMDE_FLOAT32_C(-4586.50), SIMDE_FLOAT32_C(2988.62)}, + { { SIMDE_FLOAT32_C(2173.72), SIMDE_FLOAT32_C(2173.72) }, + { SIMDE_FLOAT32_C(-600.35), SIMDE_FLOAT32_C(-600.35) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2x2_t r = simde_vld2_dup_f32(test_vec[i].a); + simde_float32x2x2_t expected = { + {simde_vld1_f32(test_vec[i].r[0]), simde_vld1_f32(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_f32x2(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f32x2(r.val[1], expected.val[1], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld2_dup_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64 a[2]; + simde_float64 unused[2]; + simde_float64 r[2][1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(21082.19), SIMDE_FLOAT64_C(47343.37) }, + { SIMDE_FLOAT64_C(49432.61), SIMDE_FLOAT64_C(-21543.65)}, + { { SIMDE_FLOAT64_C(21082.19) }, + { SIMDE_FLOAT64_C(47343.37) } } }, + { { SIMDE_FLOAT64_C(-41608.33), SIMDE_FLOAT64_C(-36883.25) }, + { SIMDE_FLOAT64_C(30575.24), SIMDE_FLOAT64_C(-12584.71)}, + { { SIMDE_FLOAT64_C(-41608.33) }, + { SIMDE_FLOAT64_C(-36883.25) } } }, + { { SIMDE_FLOAT64_C(49759.50), SIMDE_FLOAT64_C(-5992.61) }, + { SIMDE_FLOAT64_C(23231.80), SIMDE_FLOAT64_C(24747.06)}, + { { SIMDE_FLOAT64_C(49759.50) }, + { SIMDE_FLOAT64_C(-5992.61) } } }, + { { SIMDE_FLOAT64_C(40497.30), SIMDE_FLOAT64_C(-28566.42) }, + { SIMDE_FLOAT64_C(43080.67), SIMDE_FLOAT64_C(-27130.92)}, + { { SIMDE_FLOAT64_C(40497.30) }, + { SIMDE_FLOAT64_C(-28566.42) } } }, + { { SIMDE_FLOAT64_C(-18240.39), SIMDE_FLOAT64_C(10269.59) }, + { SIMDE_FLOAT64_C(11473.99), SIMDE_FLOAT64_C(13282.75)}, + { { SIMDE_FLOAT64_C(-18240.39) }, + { SIMDE_FLOAT64_C(10269.59) } } }, + { { SIMDE_FLOAT64_C(41195.98), SIMDE_FLOAT64_C(-22372.11) }, + { SIMDE_FLOAT64_C(26718.51), SIMDE_FLOAT64_C(-24527.72)}, + { { SIMDE_FLOAT64_C(41195.98) }, + { SIMDE_FLOAT64_C(-22372.11) } } }, + { { SIMDE_FLOAT64_C(-34223.23), SIMDE_FLOAT64_C(43205.97) }, + { SIMDE_FLOAT64_C(-125.59), SIMDE_FLOAT64_C(1552.92)}, + { { SIMDE_FLOAT64_C(-34223.23) }, + { SIMDE_FLOAT64_C(43205.97) } } }, + { { SIMDE_FLOAT64_C(-47799.32), SIMDE_FLOAT64_C(8831.04) }, + { SIMDE_FLOAT64_C(29486.79), SIMDE_FLOAT64_C(36070.91)}, + { { SIMDE_FLOAT64_C(-47799.32) }, + { SIMDE_FLOAT64_C(8831.04) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1x2_t r = simde_vld2_dup_f64(test_vec[i].a); + simde_float64x1x2_t expected = { + {simde_vld1_f64(test_vec[i].r[0]), simde_vld1_f64(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_f64x1(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f64x1(r.val[1], expected.val[1], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld2_dup_s8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int8_t a[2]; + int8_t unused[2]; + int8_t r[2][8]; + } test_vec[] = { + { { -INT8_C(44), -INT8_C(10) }, + { INT8_C(11), -INT8_C(46)}, + { { -INT8_C(44), -INT8_C(44), -INT8_C(44), -INT8_C(44), + -INT8_C(44), -INT8_C(44), -INT8_C(44), -INT8_C(44) }, + { -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10), + -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10) } } }, + { { -INT8_C(36), INT8_C(0) }, + { INT8_C(3), INT8_C(7)}, + { { -INT8_C(36), -INT8_C(36), -INT8_C(36), -INT8_C(36), + -INT8_C(36), -INT8_C(36), -INT8_C(36), -INT8_C(36) }, + { INT8_C(0), INT8_C(0), INT8_C(0), INT8_C(0), + INT8_C(0), INT8_C(0), INT8_C(0), INT8_C(0) } } }, + { { INT8_C(41), -INT8_C(47) }, + { INT8_C(1), -INT8_C(40)}, + { { INT8_C(41), INT8_C(41), INT8_C(41), INT8_C(41), + INT8_C(41), INT8_C(41), INT8_C(41), INT8_C(41) }, + { -INT8_C(47), -INT8_C(47), -INT8_C(47), -INT8_C(47), + -INT8_C(47), -INT8_C(47), -INT8_C(47), -INT8_C(47) } } }, + { { INT8_C(40), -INT8_C(26) }, + { -INT8_C(40), INT8_C(1)}, + { { INT8_C(40), INT8_C(40), INT8_C(40), INT8_C(40), + INT8_C(40), INT8_C(40), INT8_C(40), INT8_C(40) }, + { -INT8_C(26), -INT8_C(26), -INT8_C(26), -INT8_C(26), + -INT8_C(26), -INT8_C(26), -INT8_C(26), -INT8_C(26) } } }, + { { INT8_C(27), -INT8_C(41) }, + { -INT8_C(12), -INT8_C(34)}, + { { INT8_C(27), INT8_C(27), INT8_C(27), INT8_C(27), + INT8_C(27), INT8_C(27), INT8_C(27), INT8_C(27) }, + { -INT8_C(41), -INT8_C(41), -INT8_C(41), -INT8_C(41), + -INT8_C(41), -INT8_C(41), -INT8_C(41), -INT8_C(41) } } }, + { { INT8_C(0), INT8_C(10) }, + { -INT8_C(17), INT8_C(42)}, + { { INT8_C(0), INT8_C(0), INT8_C(0), INT8_C(0), + INT8_C(0), INT8_C(0), INT8_C(0), INT8_C(0) }, + { INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10), + INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10) } } }, + { { -INT8_C(31), -INT8_C(20) }, + { -INT8_C(18), -INT8_C(32)}, + { { -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), + -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31) }, + { -INT8_C(20), -INT8_C(20), -INT8_C(20), -INT8_C(20), + -INT8_C(20), -INT8_C(20), -INT8_C(20), -INT8_C(20) } } }, + { { INT8_C(14), -INT8_C(5) }, + { -INT8_C(17), INT8_C(17)}, + { { INT8_C(14), INT8_C(14), INT8_C(14), INT8_C(14), + INT8_C(14), INT8_C(14), INT8_C(14), INT8_C(14) }, + { -INT8_C(5), -INT8_C(5), -INT8_C(5), -INT8_C(5), + -INT8_C(5), -INT8_C(5), -INT8_C(5), -INT8_C(5) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x8x2_t r = simde_vld2_dup_s8(test_vec[i].a); + simde_int8x8x2_t expected = { + {simde_vld1_s8(test_vec[i].r[0]), simde_vld1_s8(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_i8x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i8x8(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2_dup_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[2]; + int16_t unused[2]; + int16_t r[2][4]; + } test_vec[] = { + { { -INT16_C(4410), INT16_C(4292) }, + { INT16_C(4011), -INT16_C(684)}, + { { -INT16_C(4410), -INT16_C(4410), -INT16_C(4410), -INT16_C(4410) }, + { INT16_C(4292), INT16_C(4292), INT16_C(4292), INT16_C(4292) } } }, + { { -INT16_C(385), INT16_C(937) }, + { -INT16_C(2080), -INT16_C(604)}, + { { -INT16_C(385), -INT16_C(385), -INT16_C(385), -INT16_C(385) }, + { INT16_C(937), INT16_C(937), INT16_C(937), INT16_C(937) } } }, + { { INT16_C(1583), -INT16_C(4297) }, + { -INT16_C(444), INT16_C(2137)}, + { { INT16_C(1583), INT16_C(1583), INT16_C(1583), INT16_C(1583) }, + { -INT16_C(4297), -INT16_C(4297), -INT16_C(4297), -INT16_C(4297) } } }, + { { INT16_C(851), INT16_C(2552) }, + { INT16_C(729), -INT16_C(3035)}, + { { INT16_C(851), INT16_C(851), INT16_C(851), INT16_C(851) }, + { INT16_C(2552), INT16_C(2552), INT16_C(2552), INT16_C(2552) } } }, + { { INT16_C(4380), -INT16_C(3253) }, + { -INT16_C(1679), INT16_C(1203)}, + { { INT16_C(4380), INT16_C(4380), INT16_C(4380), INT16_C(4380) }, + { -INT16_C(3253), -INT16_C(3253), -INT16_C(3253), -INT16_C(3253) } } }, + { { -INT16_C(524), -INT16_C(21) }, + { INT16_C(797), INT16_C(255)}, + { { -INT16_C(524), -INT16_C(524), -INT16_C(524), -INT16_C(524) }, + { -INT16_C(21), -INT16_C(21), -INT16_C(21), -INT16_C(21) } } }, + { { -INT16_C(186), -INT16_C(581) }, + { -INT16_C(3301), -INT16_C(4925)}, + { { -INT16_C(186), -INT16_C(186), -INT16_C(186), -INT16_C(186) }, + { -INT16_C(581), -INT16_C(581), -INT16_C(581), -INT16_C(581) } } }, + { { -INT16_C(1648), INT16_C(2898) }, + { -INT16_C(2578), INT16_C(2614)}, + { { -INT16_C(1648), -INT16_C(1648), -INT16_C(1648), -INT16_C(1648) }, + { INT16_C(2898), INT16_C(2898), INT16_C(2898), INT16_C(2898) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x4x2_t r = simde_vld2_dup_s16(test_vec[i].a); + simde_int16x4x2_t expected = { + {simde_vld1_s16(test_vec[i].r[0]), simde_vld1_s16(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_i16x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i16x4(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2_dup_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[2]; + int32_t unused[2]; + int32_t r[2][2]; + } test_vec[] = { + { { -INT32_C(423975), INT32_C(232951) }, + { INT32_C(10395), INT32_C(421653)}, + { { -INT32_C(423975), -INT32_C(423975) }, + { INT32_C(232951), INT32_C(232951) } } }, + { { -INT32_C(145011), -INT32_C(212273) }, + { INT32_C(183013), INT32_C(475303)}, + { { -INT32_C(145011), -INT32_C(145011) }, + { -INT32_C(212273), -INT32_C(212273) } } }, + { { INT32_C(58903), -INT32_C(376587) }, + { INT32_C(139539), INT32_C(95681)}, + { { INT32_C(58903), INT32_C(58903) }, + { -INT32_C(376587), -INT32_C(376587) } } }, + { { -INT32_C(192673), INT32_C(65349) }, + { INT32_C(165140), INT32_C(318086)}, + { { -INT32_C(192673), -INT32_C(192673) }, + { INT32_C(65349), INT32_C(65349) } } }, + { { INT32_C(233617), INT32_C(369077) }, + { INT32_C(447975), INT32_C(362104)}, + { { INT32_C(233617), INT32_C(233617) }, + { INT32_C(369077), INT32_C(369077) } } }, + { { -INT32_C(293844), INT32_C(124382) }, + { -INT32_C(131540), -INT32_C(38739)}, + { { -INT32_C(293844), -INT32_C(293844) }, + { INT32_C(124382), INT32_C(124382) } } }, + { { -INT32_C(198129), -INT32_C(404701) }, + { INT32_C(277076), -INT32_C(95884)}, + { { -INT32_C(198129), -INT32_C(198129) }, + { -INT32_C(404701), -INT32_C(404701) } } }, + { { -INT32_C(79259), -INT32_C(145497) }, + { -INT32_C(286896), INT32_C(459476)}, + { { -INT32_C(79259), -INT32_C(79259) }, + { -INT32_C(145497), -INT32_C(145497) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x2x2_t r = simde_vld2_dup_s32(test_vec[i].a); + simde_int32x2x2_t expected = { + {simde_vld1_s32(test_vec[i].r[0]), simde_vld1_s32(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_i32x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i32x2(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2_dup_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int64_t unused[2]; + int64_t r[2][1]; + } test_vec[] = { + { { -INT64_C(16075735), -INT64_C(41986772) }, + { INT64_C(1992980), -INT64_C(18040045)}, + { { -INT64_C(16075735) }, + { -INT64_C(41986772) } } }, + { { INT64_C(45666736), INT64_C(14603987) }, + { INT64_C(5576751), -INT64_C(32015067)}, + { { INT64_C(45666736) }, + { INT64_C(14603987) } } }, + { { INT64_C(22903463), -INT64_C(26683670) }, + { INT64_C(31335237), -INT64_C(35045895)}, + { { INT64_C(22903463) }, + { -INT64_C(26683670) } } }, + { { -INT64_C(2725423), INT64_C(14936754) }, + { -INT64_C(15271657), -INT64_C(33623481)}, + { { -INT64_C(2725423) }, + { INT64_C(14936754) } } }, + { { -INT64_C(41889667), -INT64_C(48507508) }, + { -INT64_C(40708285), INT64_C(19288415)}, + { { -INT64_C(41889667) }, + { -INT64_C(48507508) } } }, + { { -INT64_C(45608478), INT64_C(30343871) }, + { INT64_C(46346252), -INT64_C(2149923)}, + { { -INT64_C(45608478) }, + { INT64_C(30343871) } } }, + { { INT64_C(17093091), -INT64_C(49483236) }, + { -INT64_C(13243908), -INT64_C(37644879)}, + { { INT64_C(17093091) }, + { -INT64_C(49483236) } } }, + { { -INT64_C(9604042), INT64_C(42837322) }, + { -INT64_C(47932845), INT64_C(29714084)}, + { { -INT64_C(9604042) }, + { INT64_C(42837322) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x1x2_t r = simde_vld2_dup_s64(test_vec[i].a); + simde_int64x1x2_t expected = { + {simde_vld1_s64(test_vec[i].r[0]), simde_vld1_s64(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_i64x1(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i64x1(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2_dup_u8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint8_t a[2]; + uint8_t unused[2]; + uint8_t r[2][8]; + } test_vec[] = { + { { UINT8_C(94), UINT8_C(65) }, + { UINT8_C(68), UINT8_C(15)}, + { { UINT8_C(94), UINT8_C(94), UINT8_C(94), UINT8_C(94), + UINT8_C(94), UINT8_C(94), UINT8_C(94), UINT8_C(94) }, + { UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65), + UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65) } } }, + { { UINT8_C(89), UINT8_C(88) }, + { UINT8_C(87), UINT8_C(56)}, + { { UINT8_C(89), UINT8_C(89), UINT8_C(89), UINT8_C(89), + UINT8_C(89), UINT8_C(89), UINT8_C(89), UINT8_C(89) }, + { UINT8_C(88), UINT8_C(88), UINT8_C(88), UINT8_C(88), + UINT8_C(88), UINT8_C(88), UINT8_C(88), UINT8_C(88) } } }, + { { UINT8_C(28), UINT8_C(34) }, + { UINT8_C(99), UINT8_C(65)}, + { { UINT8_C(28), UINT8_C(28), UINT8_C(28), UINT8_C(28), + UINT8_C(28), UINT8_C(28), UINT8_C(28), UINT8_C(28) }, + { UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34), + UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34) } } }, + { { UINT8_C(44), UINT8_C(68) }, + { UINT8_C(10), UINT8_C(26)}, + { { UINT8_C(44), UINT8_C(44), UINT8_C(44), UINT8_C(44), + UINT8_C(44), UINT8_C(44), UINT8_C(44), UINT8_C(44) }, + { UINT8_C(68), UINT8_C(68), UINT8_C(68), UINT8_C(68), + UINT8_C(68), UINT8_C(68), UINT8_C(68), UINT8_C(68) } } }, + { { UINT8_C(47), UINT8_C(62) }, + { UINT8_C(86), UINT8_C(81)}, + { { UINT8_C(47), UINT8_C(47), UINT8_C(47), UINT8_C(47), + UINT8_C(47), UINT8_C(47), UINT8_C(47), UINT8_C(47) }, + { UINT8_C(62), UINT8_C(62), UINT8_C(62), UINT8_C(62), + UINT8_C(62), UINT8_C(62), UINT8_C(62), UINT8_C(62) } } }, + { { UINT8_C(53), UINT8_C(60) }, + { UINT8_C(43), UINT8_C(84)}, + { { UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), + UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53) }, + { UINT8_C(60), UINT8_C(60), UINT8_C(60), UINT8_C(60), + UINT8_C(60), UINT8_C(60), UINT8_C(60), UINT8_C(60) } } }, + { { UINT8_C(15), UINT8_C(10) }, + { UINT8_C(26), UINT8_C(7)}, + { { UINT8_C(15), UINT8_C(15), UINT8_C(15), UINT8_C(15), + UINT8_C(15), UINT8_C(15), UINT8_C(15), UINT8_C(15) }, + { UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10), + UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10) } } }, + { { UINT8_C(64), UINT8_C(33) }, + { UINT8_C(81), UINT8_C(53)}, + { { UINT8_C(64), UINT8_C(64), UINT8_C(64), UINT8_C(64), + UINT8_C(64), UINT8_C(64), UINT8_C(64), UINT8_C(64) }, + { UINT8_C(33), UINT8_C(33), UINT8_C(33), UINT8_C(33), + UINT8_C(33), UINT8_C(33), UINT8_C(33), UINT8_C(33) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x8x2_t r = simde_vld2_dup_u8(test_vec[i].a); + simde_uint8x8x2_t expected = { + {simde_vld1_u8(test_vec[i].r[0]), simde_vld1_u8(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_u8x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u8x8(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2_dup_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[2]; + uint16_t unused[2]; + uint16_t r[2][4]; + } test_vec[] = { + { { UINT16_C(7277), UINT16_C(6201) }, + { UINT16_C(9680), UINT16_C(3544)}, + { { UINT16_C(7277), UINT16_C(7277), UINT16_C(7277), UINT16_C(7277) }, + { UINT16_C(6201), UINT16_C(6201), UINT16_C(6201), UINT16_C(6201) } } }, + { { UINT16_C(2944), UINT16_C(5592) }, + { UINT16_C(9662), UINT16_C(4267)}, + { { UINT16_C(2944), UINT16_C(2944), UINT16_C(2944), UINT16_C(2944) }, + { UINT16_C(5592), UINT16_C(5592), UINT16_C(5592), UINT16_C(5592) } } }, + { { UINT16_C(8742), UINT16_C(6060) }, + { UINT16_C(3824), UINT16_C(2303)}, + { { UINT16_C(8742), UINT16_C(8742), UINT16_C(8742), UINT16_C(8742) }, + { UINT16_C(6060), UINT16_C(6060), UINT16_C(6060), UINT16_C(6060) } } }, + { { UINT16_C(9964), UINT16_C(5944) }, + { UINT16_C(4767), UINT16_C(7055)}, + { { UINT16_C(9964), UINT16_C(9964), UINT16_C(9964), UINT16_C(9964) }, + { UINT16_C(5944), UINT16_C(5944), UINT16_C(5944), UINT16_C(5944) } } }, + { { UINT16_C(6764), UINT16_C(915) }, + { UINT16_C(2455), UINT16_C(4422)}, + { { UINT16_C(6764), UINT16_C(6764), UINT16_C(6764), UINT16_C(6764) }, + { UINT16_C(915), UINT16_C(915), UINT16_C(915), UINT16_C(915) } } }, + { { UINT16_C(3051), UINT16_C(2687) }, + { UINT16_C(3858), UINT16_C(7628)}, + { { UINT16_C(3051), UINT16_C(3051), UINT16_C(3051), UINT16_C(3051) }, + { UINT16_C(2687), UINT16_C(2687), UINT16_C(2687), UINT16_C(2687) } } }, + { { UINT16_C(2343), UINT16_C(2688) }, + { UINT16_C(3196), UINT16_C(4003)}, + { { UINT16_C(2343), UINT16_C(2343), UINT16_C(2343), UINT16_C(2343) }, + { UINT16_C(2688), UINT16_C(2688), UINT16_C(2688), UINT16_C(2688) } } }, + { { UINT16_C(6885), UINT16_C(4075) }, + { UINT16_C(1096), UINT16_C(2437)}, + { { UINT16_C(6885), UINT16_C(6885), UINT16_C(6885), UINT16_C(6885) }, + { UINT16_C(4075), UINT16_C(4075), UINT16_C(4075), UINT16_C(4075) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x4x2_t r = simde_vld2_dup_u16(test_vec[i].a); + simde_uint16x4x2_t expected = { + {simde_vld1_u16(test_vec[i].r[0]), simde_vld1_u16(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_u16x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u16x4(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2_dup_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[2]; + uint32_t unused[2]; + uint32_t r[2][2]; + } test_vec[] = { + { { UINT32_C(8806364), UINT32_C(5747175) }, + { UINT32_C(62640), UINT32_C(51477)}, + { { UINT32_C(8806364), UINT32_C(8806364) }, + { UINT32_C(5747175), UINT32_C(5747175) } } }, + { { UINT32_C(6190067), UINT32_C(9926025) }, + { UINT32_C(5527107), UINT32_C(2621754)}, + { { UINT32_C(6190067), UINT32_C(6190067) }, + { UINT32_C(9926025), UINT32_C(9926025) } } }, + { { UINT32_C(7713063), UINT32_C(1058099) }, + { UINT32_C(5449836), UINT32_C(7469060)}, + { { UINT32_C(7713063), UINT32_C(7713063) }, + { UINT32_C(1058099), UINT32_C(1058099) } } }, + { { UINT32_C(9037710), UINT32_C(8162655) }, + { UINT32_C(6515501), UINT32_C(8695502)}, + { { UINT32_C(9037710), UINT32_C(9037710) }, + { UINT32_C(8162655), UINT32_C(8162655) } } }, + { { UINT32_C(3473717), UINT32_C(4274463) }, + { UINT32_C(2777529), UINT32_C(4443635)}, + { { UINT32_C(3473717), UINT32_C(3473717) }, + { UINT32_C(4274463), UINT32_C(4274463) } } }, + { { UINT32_C(2223044), UINT32_C(6511474) }, + { UINT32_C(2931518), UINT32_C(3607081)}, + { { UINT32_C(2223044), UINT32_C(2223044) }, + { UINT32_C(6511474), UINT32_C(6511474) } } }, + { { UINT32_C(2577872), UINT32_C(6180197) }, + { UINT32_C(9858974), UINT32_C(1298189)}, + { { UINT32_C(2577872), UINT32_C(2577872) }, + { UINT32_C(6180197), UINT32_C(6180197) } } }, + { { UINT32_C(3511221), UINT32_C(9972720) }, + { UINT32_C(4216133), UINT32_C(7465157)}, + { { UINT32_C(3511221), UINT32_C(3511221) }, + { UINT32_C(9972720), UINT32_C(9972720) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x2x2_t r = simde_vld2_dup_u32(test_vec[i].a); + simde_uint32x2x2_t expected = { + {simde_vld1_u32(test_vec[i].r[0]), simde_vld1_u32(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_u32x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u32x2(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2_dup_u64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[2]; + uint64_t unused[2]; + uint64_t r[2][1]; + } test_vec[] = { + { { UINT64_C(406306774), UINT64_C(832079054) }, + { UINT64_C(942691710), UINT64_C(590244517)}, + { { UINT64_C(406306774) }, + { UINT64_C(832079054) } } }, + { { UINT64_C(209815625), UINT64_C(142491912) }, + { UINT64_C(346572474), UINT64_C(762924904)}, + { { UINT64_C(209815625) }, + { UINT64_C(142491912) } } }, + { { UINT64_C(329793009), UINT64_C(224060866) }, + { UINT64_C(648830943), UINT64_C(251128938)}, + { { UINT64_C(329793009) }, + { UINT64_C(224060866) } } }, + { { UINT64_C(353732428), UINT64_C(38041495) }, + { UINT64_C(142075049), UINT64_C(500056775)}, + { { UINT64_C(353732428) }, + { UINT64_C(38041495) } } }, + { { UINT64_C(396112989), UINT64_C(999174752) }, + { UINT64_C(69563384), UINT64_C(343798648)}, + { { UINT64_C(396112989) }, + { UINT64_C(999174752) } } }, + { { UINT64_C(321475295), UINT64_C(362221548) }, + { UINT64_C(851265818), UINT64_C(259349005)}, + { { UINT64_C(321475295) }, + { UINT64_C(362221548) } } }, + { { UINT64_C(424982958), UINT64_C(188017536) }, + { UINT64_C(162916083), UINT64_C(282151345)}, + { { UINT64_C(424982958) }, + { UINT64_C(188017536) } } }, + { { UINT64_C(49743777), UINT64_C(694072886) }, + { UINT64_C(998695309), UINT64_C(891182183)}, + { { UINT64_C(49743777) }, + { UINT64_C(694072886) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x1x2_t r = simde_vld2_dup_u64(test_vec[i].a); + simde_uint64x1x2_t expected = { + {simde_vld1_u64(test_vec[i].r[0]), simde_vld1_u64(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_u64x1(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u64x1(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2q_dup_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[2]; + simde_float16_t unused[2]; + simde_float16_t r[2][8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(44.20), SIMDE_FLOAT16_VALUE(24.16) }, + { SIMDE_FLOAT16_VALUE(-44.99), SIMDE_FLOAT16_VALUE(0.06)}, + { { SIMDE_FLOAT16_VALUE(44.20), SIMDE_FLOAT16_VALUE(44.20), SIMDE_FLOAT16_VALUE(44.20), SIMDE_FLOAT16_VALUE(44.20), + SIMDE_FLOAT16_VALUE(44.20), SIMDE_FLOAT16_VALUE(44.20), SIMDE_FLOAT16_VALUE(44.20), SIMDE_FLOAT16_VALUE(44.20) }, + { SIMDE_FLOAT16_VALUE(24.16), SIMDE_FLOAT16_VALUE(24.16), SIMDE_FLOAT16_VALUE(24.16), SIMDE_FLOAT16_VALUE(24.16), + SIMDE_FLOAT16_VALUE(24.16), SIMDE_FLOAT16_VALUE(24.16), SIMDE_FLOAT16_VALUE(24.16), SIMDE_FLOAT16_VALUE(24.16) } } }, + { { SIMDE_FLOAT16_VALUE(49.42), SIMDE_FLOAT16_VALUE(2.99) }, + { SIMDE_FLOAT16_VALUE(-44.29), SIMDE_FLOAT16_VALUE(-47.80)}, + { { SIMDE_FLOAT16_VALUE(49.42), SIMDE_FLOAT16_VALUE(49.42), SIMDE_FLOAT16_VALUE(49.42), SIMDE_FLOAT16_VALUE(49.42), + SIMDE_FLOAT16_VALUE(49.42), SIMDE_FLOAT16_VALUE(49.42), SIMDE_FLOAT16_VALUE(49.42), SIMDE_FLOAT16_VALUE(49.42) }, + { SIMDE_FLOAT16_VALUE(2.99), SIMDE_FLOAT16_VALUE(2.99), SIMDE_FLOAT16_VALUE(2.99), SIMDE_FLOAT16_VALUE(2.99), + SIMDE_FLOAT16_VALUE(2.99), SIMDE_FLOAT16_VALUE(2.99), SIMDE_FLOAT16_VALUE(2.99), SIMDE_FLOAT16_VALUE(2.99) } } }, + { { SIMDE_FLOAT16_VALUE(-4.29), SIMDE_FLOAT16_VALUE(-21.75) }, + { SIMDE_FLOAT16_VALUE(18.84), SIMDE_FLOAT16_VALUE(47.44)}, + { { SIMDE_FLOAT16_VALUE(-4.29), SIMDE_FLOAT16_VALUE(-4.29), SIMDE_FLOAT16_VALUE(-4.29), SIMDE_FLOAT16_VALUE(-4.29), + SIMDE_FLOAT16_VALUE(-4.29), SIMDE_FLOAT16_VALUE(-4.29), SIMDE_FLOAT16_VALUE(-4.29), SIMDE_FLOAT16_VALUE(-4.29) }, + { SIMDE_FLOAT16_VALUE(-21.75), SIMDE_FLOAT16_VALUE(-21.75), SIMDE_FLOAT16_VALUE(-21.75), SIMDE_FLOAT16_VALUE(-21.75), + SIMDE_FLOAT16_VALUE(-21.75), SIMDE_FLOAT16_VALUE(-21.75), SIMDE_FLOAT16_VALUE(-21.75), SIMDE_FLOAT16_VALUE(-21.75) } } }, + { { SIMDE_FLOAT16_VALUE(14.92), SIMDE_FLOAT16_VALUE(32.44) }, + { SIMDE_FLOAT16_VALUE(32.42), SIMDE_FLOAT16_VALUE(-11.78)}, + { { SIMDE_FLOAT16_VALUE(14.92), SIMDE_FLOAT16_VALUE(14.92), SIMDE_FLOAT16_VALUE(14.92), SIMDE_FLOAT16_VALUE(14.92), + SIMDE_FLOAT16_VALUE(14.92), SIMDE_FLOAT16_VALUE(14.92), SIMDE_FLOAT16_VALUE(14.92), SIMDE_FLOAT16_VALUE(14.92) }, + { SIMDE_FLOAT16_VALUE(32.44), SIMDE_FLOAT16_VALUE(32.44), SIMDE_FLOAT16_VALUE(32.44), SIMDE_FLOAT16_VALUE(32.44), + SIMDE_FLOAT16_VALUE(32.44), SIMDE_FLOAT16_VALUE(32.44), SIMDE_FLOAT16_VALUE(32.44), SIMDE_FLOAT16_VALUE(32.44) } } }, + { { SIMDE_FLOAT16_VALUE(-15.04), SIMDE_FLOAT16_VALUE(-22.71) }, + { SIMDE_FLOAT16_VALUE(21.86), SIMDE_FLOAT16_VALUE(-1.79)}, + { { SIMDE_FLOAT16_VALUE(-15.04), SIMDE_FLOAT16_VALUE(-15.04), SIMDE_FLOAT16_VALUE(-15.04), SIMDE_FLOAT16_VALUE(-15.04), + SIMDE_FLOAT16_VALUE(-15.04), SIMDE_FLOAT16_VALUE(-15.04), SIMDE_FLOAT16_VALUE(-15.04), SIMDE_FLOAT16_VALUE(-15.04) }, + { SIMDE_FLOAT16_VALUE(-22.71), SIMDE_FLOAT16_VALUE(-22.71), SIMDE_FLOAT16_VALUE(-22.71), SIMDE_FLOAT16_VALUE(-22.71), + SIMDE_FLOAT16_VALUE(-22.71), SIMDE_FLOAT16_VALUE(-22.71), SIMDE_FLOAT16_VALUE(-22.71), SIMDE_FLOAT16_VALUE(-22.71) } } }, + { { SIMDE_FLOAT16_VALUE(2.97), SIMDE_FLOAT16_VALUE(23.53) }, + { SIMDE_FLOAT16_VALUE(-46.16), SIMDE_FLOAT16_VALUE(23.15)}, + { { SIMDE_FLOAT16_VALUE(2.97), SIMDE_FLOAT16_VALUE(2.97), SIMDE_FLOAT16_VALUE(2.97), SIMDE_FLOAT16_VALUE(2.97), + SIMDE_FLOAT16_VALUE(2.97), SIMDE_FLOAT16_VALUE(2.97), SIMDE_FLOAT16_VALUE(2.97), SIMDE_FLOAT16_VALUE(2.97) }, + { SIMDE_FLOAT16_VALUE(23.53), SIMDE_FLOAT16_VALUE(23.53), SIMDE_FLOAT16_VALUE(23.53), SIMDE_FLOAT16_VALUE(23.53), + SIMDE_FLOAT16_VALUE(23.53), SIMDE_FLOAT16_VALUE(23.53), SIMDE_FLOAT16_VALUE(23.53), SIMDE_FLOAT16_VALUE(23.53) } } }, + { { SIMDE_FLOAT16_VALUE(9.52), SIMDE_FLOAT16_VALUE(-32.92) }, + { SIMDE_FLOAT16_VALUE(24.32), SIMDE_FLOAT16_VALUE(39.39)}, + { { SIMDE_FLOAT16_VALUE(9.52), SIMDE_FLOAT16_VALUE(9.52), SIMDE_FLOAT16_VALUE(9.52), SIMDE_FLOAT16_VALUE(9.52), + SIMDE_FLOAT16_VALUE(9.52), SIMDE_FLOAT16_VALUE(9.52), SIMDE_FLOAT16_VALUE(9.52), SIMDE_FLOAT16_VALUE(9.52) }, + { SIMDE_FLOAT16_VALUE(-32.92), SIMDE_FLOAT16_VALUE(-32.92), SIMDE_FLOAT16_VALUE(-32.92), SIMDE_FLOAT16_VALUE(-32.92), + SIMDE_FLOAT16_VALUE(-32.92), SIMDE_FLOAT16_VALUE(-32.92), SIMDE_FLOAT16_VALUE(-32.92), SIMDE_FLOAT16_VALUE(-32.92) } } }, + { { SIMDE_FLOAT16_VALUE(-26.46), SIMDE_FLOAT16_VALUE(24.31) }, + { SIMDE_FLOAT16_VALUE(14.82), SIMDE_FLOAT16_VALUE(-40.92)}, + { { SIMDE_FLOAT16_VALUE(-26.46), SIMDE_FLOAT16_VALUE(-26.46), SIMDE_FLOAT16_VALUE(-26.46), SIMDE_FLOAT16_VALUE(-26.46), + SIMDE_FLOAT16_VALUE(-26.46), SIMDE_FLOAT16_VALUE(-26.46), SIMDE_FLOAT16_VALUE(-26.46), SIMDE_FLOAT16_VALUE(-26.46) }, + { SIMDE_FLOAT16_VALUE(24.31), SIMDE_FLOAT16_VALUE(24.31), SIMDE_FLOAT16_VALUE(24.31), SIMDE_FLOAT16_VALUE(24.31), + SIMDE_FLOAT16_VALUE(24.31), SIMDE_FLOAT16_VALUE(24.31), SIMDE_FLOAT16_VALUE(24.31), SIMDE_FLOAT16_VALUE(24.31) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8x2_t r = simde_vld2q_dup_f16(test_vec[i].a); + simde_float16x8x2_t expected = { + {simde_vld1q_f16(test_vec[i].r[0]), simde_vld1q_f16(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_f16x8(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f16x8(r.val[1], expected.val[1], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld2q_dup_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + float a[2]; + float unused[2]; + float r[2][4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(-36.86), SIMDE_FLOAT32_C(-3235.75) }, + { SIMDE_FLOAT32_C(-1620.77), SIMDE_FLOAT32_C(-2598.23)}, + { { SIMDE_FLOAT32_C(-36.86), SIMDE_FLOAT32_C(-36.86), SIMDE_FLOAT32_C(-36.86), SIMDE_FLOAT32_C(-36.86) }, + { SIMDE_FLOAT32_C(-3235.75), SIMDE_FLOAT32_C(-3235.75), SIMDE_FLOAT32_C(-3235.75), SIMDE_FLOAT32_C(-3235.75) } } }, + { { SIMDE_FLOAT32_C(-22.58), SIMDE_FLOAT32_C(-1940.17) }, + { SIMDE_FLOAT32_C(404.34), SIMDE_FLOAT32_C(2212.34)}, + { { SIMDE_FLOAT32_C(-22.58), SIMDE_FLOAT32_C(-22.58), SIMDE_FLOAT32_C(-22.58), SIMDE_FLOAT32_C(-22.58) }, + { SIMDE_FLOAT32_C(-1940.17), SIMDE_FLOAT32_C(-1940.17), SIMDE_FLOAT32_C(-1940.17), SIMDE_FLOAT32_C(-1940.17) } } }, + { { SIMDE_FLOAT32_C(1792.73), SIMDE_FLOAT32_C(3138.56) }, + { SIMDE_FLOAT32_C(-468.64), SIMDE_FLOAT32_C(-4526.15)}, + { { SIMDE_FLOAT32_C(1792.73), SIMDE_FLOAT32_C(1792.73), SIMDE_FLOAT32_C(1792.73), SIMDE_FLOAT32_C(1792.73) }, + { SIMDE_FLOAT32_C(3138.56), SIMDE_FLOAT32_C(3138.56), SIMDE_FLOAT32_C(3138.56), SIMDE_FLOAT32_C(3138.56) } } }, + { { SIMDE_FLOAT32_C(1849.53), SIMDE_FLOAT32_C(-3910.03) }, + { SIMDE_FLOAT32_C(-4396.76), SIMDE_FLOAT32_C(-3095.12)}, + { { SIMDE_FLOAT32_C(1849.53), SIMDE_FLOAT32_C(1849.53), SIMDE_FLOAT32_C(1849.53), SIMDE_FLOAT32_C(1849.53) }, + { SIMDE_FLOAT32_C(-3910.03), SIMDE_FLOAT32_C(-3910.03), SIMDE_FLOAT32_C(-3910.03), SIMDE_FLOAT32_C(-3910.03) } } }, + { { SIMDE_FLOAT32_C(1942.57), SIMDE_FLOAT32_C(-2284.97) }, + { SIMDE_FLOAT32_C(2327.82), SIMDE_FLOAT32_C(700.88)}, + { { SIMDE_FLOAT32_C(1942.57), SIMDE_FLOAT32_C(1942.57), SIMDE_FLOAT32_C(1942.57), SIMDE_FLOAT32_C(1942.57) }, + { SIMDE_FLOAT32_C(-2284.97), SIMDE_FLOAT32_C(-2284.97), SIMDE_FLOAT32_C(-2284.97), SIMDE_FLOAT32_C(-2284.97) } } }, + { { SIMDE_FLOAT32_C(2044.12), SIMDE_FLOAT32_C(636.75) }, + { SIMDE_FLOAT32_C(1647.09), SIMDE_FLOAT32_C(105.34)}, + { { SIMDE_FLOAT32_C(2044.12), SIMDE_FLOAT32_C(2044.12), SIMDE_FLOAT32_C(2044.12), SIMDE_FLOAT32_C(2044.12) }, + { SIMDE_FLOAT32_C(636.75), SIMDE_FLOAT32_C(636.75), SIMDE_FLOAT32_C(636.75), SIMDE_FLOAT32_C(636.75) } } }, + { { SIMDE_FLOAT32_C(4347.49), SIMDE_FLOAT32_C(1671.95) }, + { SIMDE_FLOAT32_C(-2453.27), SIMDE_FLOAT32_C(-3619.39)}, + { { SIMDE_FLOAT32_C(4347.49), SIMDE_FLOAT32_C(4347.49), SIMDE_FLOAT32_C(4347.49), SIMDE_FLOAT32_C(4347.49) }, + { SIMDE_FLOAT32_C(1671.95), SIMDE_FLOAT32_C(1671.95), SIMDE_FLOAT32_C(1671.95), SIMDE_FLOAT32_C(1671.95) } } }, + { { SIMDE_FLOAT32_C(-4696.01), SIMDE_FLOAT32_C(2725.12) }, + { SIMDE_FLOAT32_C(4041.30), SIMDE_FLOAT32_C(3596.24)}, + { { SIMDE_FLOAT32_C(-4696.01), SIMDE_FLOAT32_C(-4696.01), SIMDE_FLOAT32_C(-4696.01), SIMDE_FLOAT32_C(-4696.01) }, + { SIMDE_FLOAT32_C(2725.12), SIMDE_FLOAT32_C(2725.12), SIMDE_FLOAT32_C(2725.12), SIMDE_FLOAT32_C(2725.12) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4x2_t r = simde_vld2q_dup_f32(test_vec[i].a); + simde_float32x4x2_t expected = { + {simde_vld1q_f32(test_vec[i].r[0]), simde_vld1q_f32(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_f32x4(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f32x4(r.val[1], expected.val[1], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld2q_dup_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64 a[2]; + simde_float64 unused[2]; + simde_float64 r[2][2]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(-6318.09), SIMDE_FLOAT64_C(38696.63) }, + { SIMDE_FLOAT64_C(-27977.77), SIMDE_FLOAT64_C(-12787.81)}, + { { SIMDE_FLOAT64_C(-6318.09), SIMDE_FLOAT64_C(-6318.09) }, + { SIMDE_FLOAT64_C(38696.63), SIMDE_FLOAT64_C(38696.63) } } }, + { { SIMDE_FLOAT64_C(-21625.08), SIMDE_FLOAT64_C(26911.10) }, + { SIMDE_FLOAT64_C(33404.93), SIMDE_FLOAT64_C(33236.51)}, + { { SIMDE_FLOAT64_C(-21625.08), SIMDE_FLOAT64_C(-21625.08) }, + { SIMDE_FLOAT64_C(26911.10), SIMDE_FLOAT64_C(26911.10) } } }, + { { SIMDE_FLOAT64_C(-35341.89), SIMDE_FLOAT64_C(-4671.89) }, + { SIMDE_FLOAT64_C(-24439.53), SIMDE_FLOAT64_C(-25942.73)}, + { { SIMDE_FLOAT64_C(-35341.89), SIMDE_FLOAT64_C(-35341.89) }, + { SIMDE_FLOAT64_C(-4671.89), SIMDE_FLOAT64_C(-4671.89) } } }, + { { SIMDE_FLOAT64_C(23971.09), SIMDE_FLOAT64_C(43928.74) }, + { SIMDE_FLOAT64_C(-4938.80), SIMDE_FLOAT64_C(39585.23)}, + { { SIMDE_FLOAT64_C(23971.09), SIMDE_FLOAT64_C(23971.09) }, + { SIMDE_FLOAT64_C(43928.74), SIMDE_FLOAT64_C(43928.74) } } }, + { { SIMDE_FLOAT64_C(29978.96), SIMDE_FLOAT64_C(34924.24) }, + { SIMDE_FLOAT64_C(-11437.93), SIMDE_FLOAT64_C(-33530.62)}, + { { SIMDE_FLOAT64_C(29978.96), SIMDE_FLOAT64_C(29978.96) }, + { SIMDE_FLOAT64_C(34924.24), SIMDE_FLOAT64_C(34924.24) } } }, + { { SIMDE_FLOAT64_C(44018.29), SIMDE_FLOAT64_C(33685.76) }, + { SIMDE_FLOAT64_C(-13974.59), SIMDE_FLOAT64_C(-14748.18)}, + { { SIMDE_FLOAT64_C(44018.29), SIMDE_FLOAT64_C(44018.29) }, + { SIMDE_FLOAT64_C(33685.76), SIMDE_FLOAT64_C(33685.76) } } }, + { { SIMDE_FLOAT64_C(-14298.09), SIMDE_FLOAT64_C(-13821.65) }, + { SIMDE_FLOAT64_C(-5506.59), SIMDE_FLOAT64_C(7420.16)}, + { { SIMDE_FLOAT64_C(-14298.09), SIMDE_FLOAT64_C(-14298.09) }, + { SIMDE_FLOAT64_C(-13821.65), SIMDE_FLOAT64_C(-13821.65) } } }, + { { SIMDE_FLOAT64_C(-9654.44), SIMDE_FLOAT64_C(40543.01) }, + { SIMDE_FLOAT64_C(29651.86), SIMDE_FLOAT64_C(16915.17)}, + { { SIMDE_FLOAT64_C(-9654.44), SIMDE_FLOAT64_C(-9654.44) }, + { SIMDE_FLOAT64_C(40543.01), SIMDE_FLOAT64_C(40543.01) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2x2_t r = simde_vld2q_dup_f64(test_vec[i].a); + simde_float64x2x2_t expected = { + {simde_vld1q_f64(test_vec[i].r[0]), simde_vld1q_f64(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_f64x2(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f64x2(r.val[1], expected.val[1], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld2q_dup_s8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int8_t a[2]; + int8_t unused[2]; + int8_t r[2][16]; + } test_vec[] = { + { { INT8_C(21), -INT8_C(48) }, + { INT8_C(32), -INT8_C(39)}, + { { INT8_C(21), INT8_C(21), INT8_C(21), INT8_C(21), INT8_C(21), INT8_C(21), INT8_C(21), INT8_C(21), + INT8_C(21), INT8_C(21), INT8_C(21), INT8_C(21), INT8_C(21), INT8_C(21), INT8_C(21), INT8_C(21) }, + { -INT8_C(48), -INT8_C(48), -INT8_C(48), -INT8_C(48), -INT8_C(48), -INT8_C(48), -INT8_C(48), -INT8_C(48), + -INT8_C(48), -INT8_C(48), -INT8_C(48), -INT8_C(48), -INT8_C(48), -INT8_C(48), -INT8_C(48), -INT8_C(48) } } }, + { { INT8_C(3), INT8_C(43) }, + { INT8_C(49), -INT8_C(35)}, + { { INT8_C(3), INT8_C(3), INT8_C(3), INT8_C(3), INT8_C(3), INT8_C(3), INT8_C(3), INT8_C(3), + INT8_C(3), INT8_C(3), INT8_C(3), INT8_C(3), INT8_C(3), INT8_C(3), INT8_C(3), INT8_C(3) }, + { INT8_C(43), INT8_C(43), INT8_C(43), INT8_C(43), INT8_C(43), INT8_C(43), INT8_C(43), INT8_C(43), + INT8_C(43), INT8_C(43), INT8_C(43), INT8_C(43), INT8_C(43), INT8_C(43), INT8_C(43), INT8_C(43) } } }, + { { INT8_C(12), -INT8_C(47) }, + { -INT8_C(17), INT8_C(2)}, + { { INT8_C(12), INT8_C(12), INT8_C(12), INT8_C(12), INT8_C(12), INT8_C(12), INT8_C(12), INT8_C(12), + INT8_C(12), INT8_C(12), INT8_C(12), INT8_C(12), INT8_C(12), INT8_C(12), INT8_C(12), INT8_C(12) }, + { -INT8_C(47), -INT8_C(47), -INT8_C(47), -INT8_C(47), -INT8_C(47), -INT8_C(47), -INT8_C(47), -INT8_C(47), + -INT8_C(47), -INT8_C(47), -INT8_C(47), -INT8_C(47), -INT8_C(47), -INT8_C(47), -INT8_C(47), -INT8_C(47) } } }, + { { -INT8_C(31), -INT8_C(27) }, + { INT8_C(34), -INT8_C(28)}, + { { -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), + -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31) }, + { -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27), + -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27) } } }, + { { -INT8_C(21), -INT8_C(17) }, + { INT8_C(30), -INT8_C(40)}, + { { -INT8_C(21), -INT8_C(21), -INT8_C(21), -INT8_C(21), -INT8_C(21), -INT8_C(21), -INT8_C(21), -INT8_C(21), + -INT8_C(21), -INT8_C(21), -INT8_C(21), -INT8_C(21), -INT8_C(21), -INT8_C(21), -INT8_C(21), -INT8_C(21) }, + { -INT8_C(17), -INT8_C(17), -INT8_C(17), -INT8_C(17), -INT8_C(17), -INT8_C(17), -INT8_C(17), -INT8_C(17), + -INT8_C(17), -INT8_C(17), -INT8_C(17), -INT8_C(17), -INT8_C(17), -INT8_C(17), -INT8_C(17), -INT8_C(17) } } }, + { { INT8_C(26), -INT8_C(31) }, + { INT8_C(18), INT8_C(14)}, + { { INT8_C(26), INT8_C(26), INT8_C(26), INT8_C(26), INT8_C(26), INT8_C(26), INT8_C(26), INT8_C(26), + INT8_C(26), INT8_C(26), INT8_C(26), INT8_C(26), INT8_C(26), INT8_C(26), INT8_C(26), INT8_C(26) }, + { -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), + -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31) } } }, + { { INT8_C(47), -INT8_C(31) }, + { -INT8_C(7), INT8_C(20)}, + { { INT8_C(47), INT8_C(47), INT8_C(47), INT8_C(47), INT8_C(47), INT8_C(47), INT8_C(47), INT8_C(47), + INT8_C(47), INT8_C(47), INT8_C(47), INT8_C(47), INT8_C(47), INT8_C(47), INT8_C(47), INT8_C(47) }, + { -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), + -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31) } } }, + { { INT8_C(42), -INT8_C(10) }, + { -INT8_C(38), INT8_C(37)}, + { { INT8_C(42), INT8_C(42), INT8_C(42), INT8_C(42), INT8_C(42), INT8_C(42), INT8_C(42), INT8_C(42), + INT8_C(42), INT8_C(42), INT8_C(42), INT8_C(42), INT8_C(42), INT8_C(42), INT8_C(42), INT8_C(42) }, + { -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10), + -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x16x2_t r = simde_vld2q_dup_s8(test_vec[i].a); + simde_int8x16x2_t expected = { + {simde_vld1q_s8(test_vec[i].r[0]), simde_vld1q_s8(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_i8x16(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i8x16(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2q_dup_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[2]; + int16_t unused[2]; + int16_t r[2][8]; + } test_vec[] = { + { { INT16_C(2407), -INT16_C(2601) }, + { -INT16_C(3548), INT16_C(1088)}, + { { INT16_C(2407), INT16_C(2407), INT16_C(2407), INT16_C(2407), + INT16_C(2407), INT16_C(2407), INT16_C(2407), INT16_C(2407) }, + { -INT16_C(2601), -INT16_C(2601), -INT16_C(2601), -INT16_C(2601), + -INT16_C(2601), -INT16_C(2601), -INT16_C(2601), -INT16_C(2601) } } }, + { { -INT16_C(4900), INT16_C(3927) }, + { -INT16_C(2400), -INT16_C(4330)}, + { { -INT16_C(4900), -INT16_C(4900), -INT16_C(4900), -INT16_C(4900), + -INT16_C(4900), -INT16_C(4900), -INT16_C(4900), -INT16_C(4900) }, + { INT16_C(3927), INT16_C(3927), INT16_C(3927), INT16_C(3927), + INT16_C(3927), INT16_C(3927), INT16_C(3927), INT16_C(3927) } } }, + { { -INT16_C(564), -INT16_C(1118) }, + { INT16_C(1031), -INT16_C(151)}, + { { -INT16_C(564), -INT16_C(564), -INT16_C(564), -INT16_C(564), + -INT16_C(564), -INT16_C(564), -INT16_C(564), -INT16_C(564) }, + { -INT16_C(1118), -INT16_C(1118), -INT16_C(1118), -INT16_C(1118), + -INT16_C(1118), -INT16_C(1118), -INT16_C(1118), -INT16_C(1118) } } }, + { { INT16_C(1703), INT16_C(2374) }, + { -INT16_C(4899), -INT16_C(3132)}, + { { INT16_C(1703), INT16_C(1703), INT16_C(1703), INT16_C(1703), + INT16_C(1703), INT16_C(1703), INT16_C(1703), INT16_C(1703) }, + { INT16_C(2374), INT16_C(2374), INT16_C(2374), INT16_C(2374), + INT16_C(2374), INT16_C(2374), INT16_C(2374), INT16_C(2374) } } }, + { { INT16_C(4278), INT16_C(4104) }, + { -INT16_C(2252), INT16_C(2469)}, + { { INT16_C(4278), INT16_C(4278), INT16_C(4278), INT16_C(4278), + INT16_C(4278), INT16_C(4278), INT16_C(4278), INT16_C(4278) }, + { INT16_C(4104), INT16_C(4104), INT16_C(4104), INT16_C(4104), + INT16_C(4104), INT16_C(4104), INT16_C(4104), INT16_C(4104) } } }, + { { -INT16_C(1164), -INT16_C(4939) }, + { -INT16_C(3858), INT16_C(4548)}, + { { -INT16_C(1164), -INT16_C(1164), -INT16_C(1164), -INT16_C(1164), + -INT16_C(1164), -INT16_C(1164), -INT16_C(1164), -INT16_C(1164) }, + { -INT16_C(4939), -INT16_C(4939), -INT16_C(4939), -INT16_C(4939), + -INT16_C(4939), -INT16_C(4939), -INT16_C(4939), -INT16_C(4939) } } }, + { { -INT16_C(3397), INT16_C(3662) }, + { INT16_C(4517), -INT16_C(1655)}, + { { -INT16_C(3397), -INT16_C(3397), -INT16_C(3397), -INT16_C(3397), + -INT16_C(3397), -INT16_C(3397), -INT16_C(3397), -INT16_C(3397) }, + { INT16_C(3662), INT16_C(3662), INT16_C(3662), INT16_C(3662), + INT16_C(3662), INT16_C(3662), INT16_C(3662), INT16_C(3662) } } }, + { { -INT16_C(3636), INT16_C(1082) }, + { -INT16_C(3186), INT16_C(4513)}, + { { -INT16_C(3636), -INT16_C(3636), -INT16_C(3636), -INT16_C(3636), + -INT16_C(3636), -INT16_C(3636), -INT16_C(3636), -INT16_C(3636) }, + { INT16_C(1082), INT16_C(1082), INT16_C(1082), INT16_C(1082), + INT16_C(1082), INT16_C(1082), INT16_C(1082), INT16_C(1082) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8x2_t r = simde_vld2q_dup_s16(test_vec[i].a); + simde_int16x8x2_t expected = { + {simde_vld1q_s16(test_vec[i].r[0]), simde_vld1q_s16(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_i16x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i16x8(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2q_dup_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[2]; + int32_t unused[2]; + int32_t r[2][4]; + } test_vec[] = { + { { -INT32_C(320215), -INT32_C(128824) }, + { -INT32_C(84416), INT32_C(173924)}, + { { -INT32_C(320215), -INT32_C(320215), -INT32_C(320215), -INT32_C(320215) }, + { -INT32_C(128824), -INT32_C(128824), -INT32_C(128824), -INT32_C(128824) } } }, + { { -INT32_C(352719), -INT32_C(124530) }, + { INT32_C(469272), -INT32_C(259438)}, + { { -INT32_C(352719), -INT32_C(352719), -INT32_C(352719), -INT32_C(352719) }, + { -INT32_C(124530), -INT32_C(124530), -INT32_C(124530), -INT32_C(124530) } } }, + { { INT32_C(26840), -INT32_C(67706) }, + { INT32_C(306244), -INT32_C(394994)}, + { { INT32_C(26840), INT32_C(26840), INT32_C(26840), INT32_C(26840) }, + { -INT32_C(67706), -INT32_C(67706), -INT32_C(67706), -INT32_C(67706) } } }, + { { INT32_C(386819), -INT32_C(52375) }, + { -INT32_C(221686), INT32_C(458634)}, + { { INT32_C(386819), INT32_C(386819), INT32_C(386819), INT32_C(386819) }, + { -INT32_C(52375), -INT32_C(52375), -INT32_C(52375), -INT32_C(52375) } } }, + { { INT32_C(71692), -INT32_C(6908) }, + { -INT32_C(241460), INT32_C(376961)}, + { { INT32_C(71692), INT32_C(71692), INT32_C(71692), INT32_C(71692) }, + { -INT32_C(6908), -INT32_C(6908), -INT32_C(6908), -INT32_C(6908) } } }, + { { -INT32_C(153484), INT32_C(220056) }, + { INT32_C(497409), -INT32_C(429618)}, + { { -INT32_C(153484), -INT32_C(153484), -INT32_C(153484), -INT32_C(153484) }, + { INT32_C(220056), INT32_C(220056), INT32_C(220056), INT32_C(220056) } } }, + { { INT32_C(445458), -INT32_C(422069) }, + { -INT32_C(473699), -INT32_C(169167)}, + { { INT32_C(445458), INT32_C(445458), INT32_C(445458), INT32_C(445458) }, + { -INT32_C(422069), -INT32_C(422069), -INT32_C(422069), -INT32_C(422069) } } }, + { { INT32_C(142131), -INT32_C(97847) }, + { -INT32_C(394989), -INT32_C(267336)}, + { { INT32_C(142131), INT32_C(142131), INT32_C(142131), INT32_C(142131) }, + { -INT32_C(97847), -INT32_C(97847), -INT32_C(97847), -INT32_C(97847) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4x2_t r = simde_vld2q_dup_s32(test_vec[i].a); + simde_int32x4x2_t expected = { + {simde_vld1q_s32(test_vec[i].r[0]), simde_vld1q_s32(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_i32x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i32x4(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2q_dup_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int64_t unused[2]; + int64_t r[2][2]; + } test_vec[] = { + { { -INT64_C(15235051), -INT64_C(30008301) }, + { INT64_C(31964111), -INT64_C(24641971)}, + { { -INT64_C(15235051), -INT64_C(15235051) }, + { -INT64_C(30008301), -INT64_C(30008301) } } }, + { { -INT64_C(16181149), INT64_C(3841670) }, + { -INT64_C(27973863), INT64_C(39259391)}, + { { -INT64_C(16181149), -INT64_C(16181149) }, + { INT64_C(3841670), INT64_C(3841670) } } }, + { { INT64_C(48064682), INT64_C(41855966) }, + { -INT64_C(10570224), INT64_C(38329774)}, + { { INT64_C(48064682), INT64_C(48064682) }, + { INT64_C(41855966), INT64_C(41855966) } } }, + { { -INT64_C(90740), INT64_C(37361034) }, + { INT64_C(44464818), INT64_C(15207495)}, + { { -INT64_C(90740), -INT64_C(90740) }, + { INT64_C(37361034), INT64_C(37361034) } } }, + { { INT64_C(21453155), INT64_C(22061973) }, + { -INT64_C(31158130), INT64_C(38690033)}, + { { INT64_C(21453155), INT64_C(21453155) }, + { INT64_C(22061973), INT64_C(22061973) } } }, + { { -INT64_C(21071153), -INT64_C(25469385) }, + { INT64_C(14847780), INT64_C(9714022)}, + { { -INT64_C(21071153), -INT64_C(21071153) }, + { -INT64_C(25469385), -INT64_C(25469385) } } }, + { { -INT64_C(21428279), -INT64_C(33446233) }, + { INT64_C(33284182), -INT64_C(519364)}, + { { -INT64_C(21428279), -INT64_C(21428279) }, + { -INT64_C(33446233), -INT64_C(33446233) } } }, + { { INT64_C(40007009), INT64_C(23627817) }, + { INT64_C(29955083), INT64_C(41266286)}, + { { INT64_C(40007009), INT64_C(40007009) }, + { INT64_C(23627817), INT64_C(23627817) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2x2_t r = simde_vld2q_dup_s64(test_vec[i].a); + simde_int64x2x2_t expected = { + {simde_vld1q_s64(test_vec[i].r[0]), simde_vld1q_s64(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_i64x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i64x2(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2q_dup_u8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint8_t a[2]; + uint8_t unused[2]; + uint8_t r[2][16]; + } test_vec[] = { + { { UINT8_C(96), UINT8_C(23) }, + { UINT8_C(44), UINT8_C(91)}, + { { UINT8_C(96), UINT8_C(96), UINT8_C(96), UINT8_C(96), UINT8_C(96), UINT8_C(96), UINT8_C(96), UINT8_C(96), + UINT8_C(96), UINT8_C(96), UINT8_C(96), UINT8_C(96), UINT8_C(96), UINT8_C(96), UINT8_C(96), UINT8_C(96) }, + { UINT8_C(23), UINT8_C(23), UINT8_C(23), UINT8_C(23), UINT8_C(23), UINT8_C(23), UINT8_C(23), UINT8_C(23), + UINT8_C(23), UINT8_C(23), UINT8_C(23), UINT8_C(23), UINT8_C(23), UINT8_C(23), UINT8_C(23), UINT8_C(23) } } }, + { { UINT8_C(53), UINT8_C(10) }, + { UINT8_C(65), UINT8_C(22)}, + { { UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), + UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53) }, + { UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10), + UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10) } } }, + { { UINT8_C(83), UINT8_C(3) }, + { UINT8_C(50), UINT8_C(69)}, + { { UINT8_C(83), UINT8_C(83), UINT8_C(83), UINT8_C(83), UINT8_C(83), UINT8_C(83), UINT8_C(83), UINT8_C(83), + UINT8_C(83), UINT8_C(83), UINT8_C(83), UINT8_C(83), UINT8_C(83), UINT8_C(83), UINT8_C(83), UINT8_C(83) }, + { UINT8_C(3), UINT8_C(3), UINT8_C(3), UINT8_C(3), UINT8_C(3), UINT8_C(3), UINT8_C(3), UINT8_C(3), + UINT8_C(3), UINT8_C(3), UINT8_C(3), UINT8_C(3), UINT8_C(3), UINT8_C(3), UINT8_C(3), UINT8_C(3) } } }, + { { UINT8_C(48), UINT8_C(62) }, + { UINT8_C(64), UINT8_C(80)}, + { { UINT8_C(48), UINT8_C(48), UINT8_C(48), UINT8_C(48), UINT8_C(48), UINT8_C(48), UINT8_C(48), UINT8_C(48), + UINT8_C(48), UINT8_C(48), UINT8_C(48), UINT8_C(48), UINT8_C(48), UINT8_C(48), UINT8_C(48), UINT8_C(48) }, + { UINT8_C(62), UINT8_C(62), UINT8_C(62), UINT8_C(62), UINT8_C(62), UINT8_C(62), UINT8_C(62), UINT8_C(62), + UINT8_C(62), UINT8_C(62), UINT8_C(62), UINT8_C(62), UINT8_C(62), UINT8_C(62), UINT8_C(62), UINT8_C(62) } } }, + { { UINT8_C(33), UINT8_C(39) }, + { UINT8_C(0), UINT8_C(10)}, + { { UINT8_C(33), UINT8_C(33), UINT8_C(33), UINT8_C(33), UINT8_C(33), UINT8_C(33), UINT8_C(33), UINT8_C(33), + UINT8_C(33), UINT8_C(33), UINT8_C(33), UINT8_C(33), UINT8_C(33), UINT8_C(33), UINT8_C(33), UINT8_C(33) }, + { UINT8_C(39), UINT8_C(39), UINT8_C(39), UINT8_C(39), UINT8_C(39), UINT8_C(39), UINT8_C(39), UINT8_C(39), + UINT8_C(39), UINT8_C(39), UINT8_C(39), UINT8_C(39), UINT8_C(39), UINT8_C(39), UINT8_C(39), UINT8_C(39) } } }, + { { UINT8_C(53), UINT8_C(22) }, + { UINT8_C(79), UINT8_C(9)}, + { { UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), + UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53) }, + { UINT8_C(22), UINT8_C(22), UINT8_C(22), UINT8_C(22), UINT8_C(22), UINT8_C(22), UINT8_C(22), UINT8_C(22), + UINT8_C(22), UINT8_C(22), UINT8_C(22), UINT8_C(22), UINT8_C(22), UINT8_C(22), UINT8_C(22), UINT8_C(22) } } }, + { { UINT8_C(54), UINT8_C(10) }, + { UINT8_C(3), UINT8_C(83)}, + { { UINT8_C(54), UINT8_C(54), UINT8_C(54), UINT8_C(54), UINT8_C(54), UINT8_C(54), UINT8_C(54), UINT8_C(54), + UINT8_C(54), UINT8_C(54), UINT8_C(54), UINT8_C(54), UINT8_C(54), UINT8_C(54), UINT8_C(54), UINT8_C(54) }, + { UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10), + UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10), UINT8_C(10) } } }, + { { UINT8_C(9), UINT8_C(80) }, + { UINT8_C(74), UINT8_C(55)}, + { { UINT8_C(9), UINT8_C(9), UINT8_C(9), UINT8_C(9), UINT8_C(9), UINT8_C(9), UINT8_C(9), UINT8_C(9), + UINT8_C(9), UINT8_C(9), UINT8_C(9), UINT8_C(9), UINT8_C(9), UINT8_C(9), UINT8_C(9), UINT8_C(9) }, + { UINT8_C(80), UINT8_C(80), UINT8_C(80), UINT8_C(80), UINT8_C(80), UINT8_C(80), UINT8_C(80), UINT8_C(80), + UINT8_C(80), UINT8_C(80), UINT8_C(80), UINT8_C(80), UINT8_C(80), UINT8_C(80), UINT8_C(80), UINT8_C(80) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x16x2_t r = simde_vld2q_dup_u8(test_vec[i].a); + simde_uint8x16x2_t expected = { + {simde_vld1q_u8(test_vec[i].r[0]), simde_vld1q_u8(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_u8x16(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u8x16(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2q_dup_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[2]; + uint16_t unused[2]; + uint16_t r[2][8]; + } test_vec[] = { + { { UINT16_C(6058), UINT16_C(6273) }, + { UINT16_C(6301), UINT16_C(7423)}, + { { UINT16_C(6058), UINT16_C(6058), UINT16_C(6058), UINT16_C(6058), + UINT16_C(6058), UINT16_C(6058), UINT16_C(6058), UINT16_C(6058) }, + { UINT16_C(6273), UINT16_C(6273), UINT16_C(6273), UINT16_C(6273), + UINT16_C(6273), UINT16_C(6273), UINT16_C(6273), UINT16_C(6273) } } }, + { { UINT16_C(7176), UINT16_C(4578) }, + { UINT16_C(9644), UINT16_C(9994)}, + { { UINT16_C(7176), UINT16_C(7176), UINT16_C(7176), UINT16_C(7176), + UINT16_C(7176), UINT16_C(7176), UINT16_C(7176), UINT16_C(7176) }, + { UINT16_C(4578), UINT16_C(4578), UINT16_C(4578), UINT16_C(4578), + UINT16_C(4578), UINT16_C(4578), UINT16_C(4578), UINT16_C(4578) } } }, + { { UINT16_C(7902), UINT16_C(1902) }, + { UINT16_C(7534), UINT16_C(3814)}, + { { UINT16_C(7902), UINT16_C(7902), UINT16_C(7902), UINT16_C(7902), + UINT16_C(7902), UINT16_C(7902), UINT16_C(7902), UINT16_C(7902) }, + { UINT16_C(1902), UINT16_C(1902), UINT16_C(1902), UINT16_C(1902), + UINT16_C(1902), UINT16_C(1902), UINT16_C(1902), UINT16_C(1902) } } }, + { { UINT16_C(8498), UINT16_C(3391) }, + { UINT16_C(2384), UINT16_C(829)}, + { { UINT16_C(8498), UINT16_C(8498), UINT16_C(8498), UINT16_C(8498), + UINT16_C(8498), UINT16_C(8498), UINT16_C(8498), UINT16_C(8498) }, + { UINT16_C(3391), UINT16_C(3391), UINT16_C(3391), UINT16_C(3391), + UINT16_C(3391), UINT16_C(3391), UINT16_C(3391), UINT16_C(3391) } } }, + { { UINT16_C(5757), UINT16_C(7453) }, + { UINT16_C(3497), UINT16_C(4212)}, + { { UINT16_C(5757), UINT16_C(5757), UINT16_C(5757), UINT16_C(5757), + UINT16_C(5757), UINT16_C(5757), UINT16_C(5757), UINT16_C(5757) }, + { UINT16_C(7453), UINT16_C(7453), UINT16_C(7453), UINT16_C(7453), + UINT16_C(7453), UINT16_C(7453), UINT16_C(7453), UINT16_C(7453) } } }, + { { UINT16_C(2420), UINT16_C(5818) }, + { UINT16_C(6753), UINT16_C(5749)}, + { { UINT16_C(2420), UINT16_C(2420), UINT16_C(2420), UINT16_C(2420), + UINT16_C(2420), UINT16_C(2420), UINT16_C(2420), UINT16_C(2420) }, + { UINT16_C(5818), UINT16_C(5818), UINT16_C(5818), UINT16_C(5818), + UINT16_C(5818), UINT16_C(5818), UINT16_C(5818), UINT16_C(5818) } } }, + { { UINT16_C(3143), UINT16_C(6838) }, + { UINT16_C(8323), UINT16_C(2227)}, + { { UINT16_C(3143), UINT16_C(3143), UINT16_C(3143), UINT16_C(3143), + UINT16_C(3143), UINT16_C(3143), UINT16_C(3143), UINT16_C(3143) }, + { UINT16_C(6838), UINT16_C(6838), UINT16_C(6838), UINT16_C(6838), + UINT16_C(6838), UINT16_C(6838), UINT16_C(6838), UINT16_C(6838) } } }, + { { UINT16_C(6156), UINT16_C(8714) }, + { UINT16_C(9846), UINT16_C(2394)}, + { { UINT16_C(6156), UINT16_C(6156), UINT16_C(6156), UINT16_C(6156), + UINT16_C(6156), UINT16_C(6156), UINT16_C(6156), UINT16_C(6156) }, + { UINT16_C(8714), UINT16_C(8714), UINT16_C(8714), UINT16_C(8714), + UINT16_C(8714), UINT16_C(8714), UINT16_C(8714), UINT16_C(8714) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x8x2_t r = simde_vld2q_dup_u16(test_vec[i].a); + simde_uint16x8x2_t expected = { + {simde_vld1q_u16(test_vec[i].r[0]), simde_vld1q_u16(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_u16x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u16x8(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2q_dup_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[2]; + uint32_t unused[2]; + uint32_t r[2][4]; + } test_vec[] = { + { { UINT32_C(536106), UINT32_C(877508) }, + { UINT32_C(4679840), UINT32_C(2116123)}, + { { UINT32_C(536106), UINT32_C(536106), UINT32_C(536106), UINT32_C(536106) }, + { UINT32_C(877508), UINT32_C(877508), UINT32_C(877508), UINT32_C(877508) } } }, + { { UINT32_C(2745092), UINT32_C(2480848) }, + { UINT32_C(6021702), UINT32_C(5477534)}, + { { UINT32_C(2745092), UINT32_C(2745092), UINT32_C(2745092), UINT32_C(2745092) }, + { UINT32_C(2480848), UINT32_C(2480848), UINT32_C(2480848), UINT32_C(2480848) } } }, + { { UINT32_C(4890262), UINT32_C(9837780) }, + { UINT32_C(2908038), UINT32_C(8114742)}, + { { UINT32_C(4890262), UINT32_C(4890262), UINT32_C(4890262), UINT32_C(4890262) }, + { UINT32_C(9837780), UINT32_C(9837780), UINT32_C(9837780), UINT32_C(9837780) } } }, + { { UINT32_C(6798984), UINT32_C(3169873) }, + { UINT32_C(5423024), UINT32_C(204472)}, + { { UINT32_C(6798984), UINT32_C(6798984), UINT32_C(6798984), UINT32_C(6798984) }, + { UINT32_C(3169873), UINT32_C(3169873), UINT32_C(3169873), UINT32_C(3169873) } } }, + { { UINT32_C(5642096), UINT32_C(2938870) }, + { UINT32_C(6387732), UINT32_C(8027323)}, + { { UINT32_C(5642096), UINT32_C(5642096), UINT32_C(5642096), UINT32_C(5642096) }, + { UINT32_C(2938870), UINT32_C(2938870), UINT32_C(2938870), UINT32_C(2938870) } } }, + { { UINT32_C(992304), UINT32_C(4886731) }, + { UINT32_C(6516958), UINT32_C(303121)}, + { { UINT32_C(992304), UINT32_C(992304), UINT32_C(992304), UINT32_C(992304) }, + { UINT32_C(4886731), UINT32_C(4886731), UINT32_C(4886731), UINT32_C(4886731) } } }, + { { UINT32_C(5616151), UINT32_C(6074604) }, + { UINT32_C(2135742), UINT32_C(2607255)}, + { { UINT32_C(5616151), UINT32_C(5616151), UINT32_C(5616151), UINT32_C(5616151) }, + { UINT32_C(6074604), UINT32_C(6074604), UINT32_C(6074604), UINT32_C(6074604) } } }, + { { UINT32_C(5796703), UINT32_C(4718965) }, + { UINT32_C(8894279), UINT32_C(5823775)}, + { { UINT32_C(5796703), UINT32_C(5796703), UINT32_C(5796703), UINT32_C(5796703) }, + { UINT32_C(4718965), UINT32_C(4718965), UINT32_C(4718965), UINT32_C(4718965) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4x2_t r = simde_vld2q_dup_u32(test_vec[i].a); + simde_uint32x4x2_t expected = { + {simde_vld1q_u32(test_vec[i].r[0]), simde_vld1q_u32(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_u32x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u32x4(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2q_dup_u64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[2]; + uint64_t unused[2]; + uint64_t r[2][2]; + } test_vec[] = { + { { UINT64_C(706048852), UINT64_C(383443850) }, + { UINT64_C(285705054), UINT64_C(822983505)}, + { { UINT64_C(706048852), UINT64_C(706048852) }, + { UINT64_C(383443850), UINT64_C(383443850) } } }, + { { UINT64_C(421014862), UINT64_C(906802850) }, + { UINT64_C(817754491), UINT64_C(29691523)}, + { { UINT64_C(421014862), UINT64_C(421014862) }, + { UINT64_C(906802850), UINT64_C(906802850) } } }, + { { UINT64_C(221353308), UINT64_C(322577141) }, + { UINT64_C(995600522), UINT64_C(297974478)}, + { { UINT64_C(221353308), UINT64_C(221353308) }, + { UINT64_C(322577141), UINT64_C(322577141) } } }, + { { UINT64_C(663578487), UINT64_C(286997839) }, + { UINT64_C(243168275), UINT64_C(577857697)}, + { { UINT64_C(663578487), UINT64_C(663578487) }, + { UINT64_C(286997839), UINT64_C(286997839) } } }, + { { UINT64_C(593025173), UINT64_C(964665381) }, + { UINT64_C(503395467), UINT64_C(204960526)}, + { { UINT64_C(593025173), UINT64_C(593025173) }, + { UINT64_C(964665381), UINT64_C(964665381) } } }, + { { UINT64_C(962157997), UINT64_C(598569861) }, + { UINT64_C(705698189), UINT64_C(932957687)}, + { { UINT64_C(962157997), UINT64_C(962157997) }, + { UINT64_C(598569861), UINT64_C(598569861) } } }, + { { UINT64_C(486609766), UINT64_C(401817412) }, + { UINT64_C(317463497), UINT64_C(368423896)}, + { { UINT64_C(486609766), UINT64_C(486609766) }, + { UINT64_C(401817412), UINT64_C(401817412) } } }, + { { UINT64_C(603061297), UINT64_C(721346221) }, + { UINT64_C(633243906), UINT64_C(434342421)}, + { { UINT64_C(603061297), UINT64_C(603061297) }, + { UINT64_C(721346221), UINT64_C(721346221) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2x2_t r = simde_vld2q_dup_u64(test_vec[i].a); + simde_uint64x2x2_t expected = { + {simde_vld1q_u64(test_vec[i].r[0]), simde_vld1q_u64(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_u64x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u64x2(r.val[1], expected.val[1]); + } + + return 0; +} + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_TEST_FUNC_LIST_BEGIN +#if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_dup_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_dup_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_dup_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_dup_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_dup_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_dup_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_dup_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_dup_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_dup_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_dup_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_dup_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_dup_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_dup_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_dup_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_dup_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_dup_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_dup_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_dup_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_dup_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_dup_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_dup_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_dup_u64) +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/ld2_lane.c b/test/arm/neon/ld2_lane.c new file mode 100644 index 000000000..99ed72fac --- /dev/null +++ b/test/arm/neon/ld2_lane.c @@ -0,0 +1,1449 @@ +#define SIMDE_TEST_ARM_NEON_INSN ld2_lane + +#include "test-neon.h" +#include "../../../simde/arm/neon/ld2_lane.h" +#include "../../../simde/arm/neon/ld1.h" +#include "../../../simde/arm/neon/ld2.h" + +static int +test_simde_vld2_lane_s8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int8_t src[2][8]; + int8_t buf[2]; + int8_t r[2][8]; + } test_vec[] = { + { { { -INT8_C(16), INT8_C(44), -INT8_C(6), INT8_C(49), + -INT8_C(48), -INT8_C(22), INT8_C(4), INT8_C(6) }, + { -INT8_C(7), INT8_C(40), -INT8_C(17), INT8_C(15), + -INT8_C(21), -INT8_C(4), INT8_C(14), -INT8_C(25) } }, + { INT8_C(43), -INT8_C(26)}, + { { INT8_C(43), INT8_C(44), -INT8_C(6), INT8_C(49), + -INT8_C(48), -INT8_C(22), INT8_C(4), INT8_C(6) }, + { -INT8_C(26), INT8_C(40), -INT8_C(17), INT8_C(15), + -INT8_C(21), -INT8_C(4), INT8_C(14), -INT8_C(25) } } }, + { { { INT8_C(0), -INT8_C(47), INT8_C(9), INT8_C(28), + -INT8_C(8), -INT8_C(39), -INT8_C(37), INT8_C(35) }, + { INT8_C(38), INT8_C(35), INT8_C(27), -INT8_C(16), + INT8_C(14), -INT8_C(1), -INT8_C(16), INT8_C(5) } }, + { -INT8_C(48), INT8_C(10)}, + { { INT8_C(0), -INT8_C(48), INT8_C(9), INT8_C(28), + -INT8_C(8), -INT8_C(39), -INT8_C(37), INT8_C(35) }, + { INT8_C(38), INT8_C(10), INT8_C(27), -INT8_C(16), + INT8_C(14), -INT8_C(1), -INT8_C(16), INT8_C(5) } } }, + { { { -INT8_C(45), -INT8_C(48), -INT8_C(30), -INT8_C(12), + INT8_C(2), INT8_C(21), INT8_C(16), INT8_C(39) }, + { -INT8_C(3), INT8_C(30), -INT8_C(37), -INT8_C(12), + INT8_C(17), INT8_C(23), -INT8_C(41), INT8_C(18) } }, + { -INT8_C(30), -INT8_C(13)}, + { { -INT8_C(45), -INT8_C(48), -INT8_C(30), -INT8_C(12), + INT8_C(2), INT8_C(21), INT8_C(16), INT8_C(39) }, + { -INT8_C(3), INT8_C(30), -INT8_C(13), -INT8_C(12), + INT8_C(17), INT8_C(23), -INT8_C(41), INT8_C(18) } } }, + { { { -INT8_C(9), -INT8_C(39), INT8_C(49), -INT8_C(10), + -INT8_C(45), INT8_C(24), -INT8_C(17), INT8_C(30) }, + { -INT8_C(36), INT8_C(20), -INT8_C(15), -INT8_C(21), + -INT8_C(23), INT8_C(22), -INT8_C(39), -INT8_C(18) } }, + { INT8_C(13), -INT8_C(37)}, + { { -INT8_C(9), -INT8_C(39), INT8_C(49), INT8_C(13), + -INT8_C(45), INT8_C(24), -INT8_C(17), INT8_C(30) }, + { -INT8_C(36), INT8_C(20), -INT8_C(15), -INT8_C(37), + -INT8_C(23), INT8_C(22), -INT8_C(39), -INT8_C(18) } } }, + { { { INT8_C(17), INT8_C(17), -INT8_C(11), INT8_C(43), + -INT8_C(20), -INT8_C(10), INT8_C(44), INT8_C(43) }, + { INT8_C(23), -INT8_C(4), -INT8_C(32), -INT8_C(2), + -INT8_C(39), -INT8_C(1), -INT8_C(37), INT8_C(42) } }, + { -INT8_C(40), INT8_C(34)}, + { { INT8_C(17), INT8_C(17), -INT8_C(11), INT8_C(43), + -INT8_C(40), -INT8_C(10), INT8_C(44), INT8_C(43) }, + { INT8_C(23), -INT8_C(4), -INT8_C(32), -INT8_C(2), + INT8_C(34), -INT8_C(1), -INT8_C(37), INT8_C(42) } } }, + { { { INT8_C(16), INT8_C(35), -INT8_C(33), -INT8_C(19), + INT8_C(15), -INT8_C(17), INT8_C(13), -INT8_C(49) }, + { INT8_C(21), -INT8_C(24), -INT8_C(3), INT8_C(10), + -INT8_C(31), INT8_C(24), -INT8_C(46), -INT8_C(14) } }, + { -INT8_C(24), -INT8_C(3)}, + { { INT8_C(16), INT8_C(35), -INT8_C(33), -INT8_C(19), + INT8_C(15), -INT8_C(24), INT8_C(13), -INT8_C(49) }, + { INT8_C(21), -INT8_C(24), -INT8_C(3), INT8_C(10), + -INT8_C(31), -INT8_C(3), -INT8_C(46), -INT8_C(14) } } }, + { { { INT8_C(25), -INT8_C(9), -INT8_C(16), INT8_C(33), + -INT8_C(14), -INT8_C(45), INT8_C(0), -INT8_C(45) }, + { -INT8_C(40), INT8_C(36), INT8_C(38), -INT8_C(48), + -INT8_C(20), -INT8_C(22), INT8_C(1), -INT8_C(43) } }, + { -INT8_C(33), -INT8_C(50)}, + { { INT8_C(25), -INT8_C(9), -INT8_C(16), INT8_C(33), + -INT8_C(14), -INT8_C(45), -INT8_C(33), -INT8_C(45) }, + { -INT8_C(40), INT8_C(36), INT8_C(38), -INT8_C(48), + -INT8_C(20), -INT8_C(22), -INT8_C(50), -INT8_C(43) } } }, + { { { INT8_C(38), -INT8_C(16), INT8_C(32), -INT8_C(48), + INT8_C(28), -INT8_C(6), -INT8_C(48), -INT8_C(36) }, + { INT8_C(12), INT8_C(7), -INT8_C(12), -INT8_C(12), + INT8_C(33), INT8_C(28), INT8_C(26), -INT8_C(43) } }, + { -INT8_C(42), -INT8_C(7)}, + { { INT8_C(38), -INT8_C(16), INT8_C(32), -INT8_C(48), + INT8_C(28), -INT8_C(6), -INT8_C(48), -INT8_C(42) }, + { INT8_C(12), INT8_C(7), -INT8_C(12), -INT8_C(12), + INT8_C(33), INT8_C(28), INT8_C(26), -INT8_C(7) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x8x2_t r, src, expected; + src.val[0] = simde_vld1_s8(test_vec[i].src[0]); + src.val[1] = simde_vld1_s8(test_vec[i].src[1]); + + SIMDE_CONSTIFY_8_(simde_vld2_lane_s8, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1_s8(test_vec[i].r[0]); + expected.val[1] = simde_vld1_s8(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_i8x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i8x8(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t src[2][4]; + int16_t buf[2]; + int16_t r[2][4]; + } test_vec[] = { + { { { INT16_C(124), INT16_C(412), -INT16_C(312), -INT16_C(107) }, + { INT16_C(349), INT16_C(217), INT16_C(241), INT16_C(95) } }, + { -INT16_C(275), INT16_C(317)}, + { { -INT16_C(275), INT16_C(412), -INT16_C(312), -INT16_C(107) }, + { INT16_C(317), INT16_C(217), INT16_C(241), INT16_C(95) } } }, + { { { INT16_C(378), INT16_C(448), -INT16_C(60), -INT16_C(474) }, + { INT16_C(266), -INT16_C(135), INT16_C(422), INT16_C(223) } }, + { INT16_C(297), INT16_C(5)}, + { { INT16_C(378), INT16_C(297), -INT16_C(60), -INT16_C(474) }, + { INT16_C(266), INT16_C(5), INT16_C(422), INT16_C(223) } } }, + { { { -INT16_C(197), INT16_C(3), -INT16_C(245), INT16_C(218) }, + { -INT16_C(216), -INT16_C(262), INT16_C(180), -INT16_C(60) } }, + { -INT16_C(96), INT16_C(492)}, + { { -INT16_C(197), INT16_C(3), -INT16_C(96), INT16_C(218) }, + { -INT16_C(216), -INT16_C(262), INT16_C(492), -INT16_C(60) } } }, + { { { -INT16_C(395), INT16_C(374), -INT16_C(448), -INT16_C(445) }, + { INT16_C(60), INT16_C(486), -INT16_C(283), INT16_C(298) } }, + { INT16_C(218), INT16_C(225)}, + { { -INT16_C(395), INT16_C(374), -INT16_C(448), INT16_C(218) }, + { INT16_C(60), INT16_C(486), -INT16_C(283), INT16_C(225) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x4x2_t r, src, expected; + src.val[0] = simde_vld1_s16(test_vec[i].src[0]); + src.val[1] = simde_vld1_s16(test_vec[i].src[1]); + + SIMDE_CONSTIFY_4_(simde_vld2_lane_s16, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1_s16(test_vec[i].r[0]); + expected.val[1] = simde_vld1_s16(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_i16x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i16x4(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t src[2][2]; + int32_t buf[2]; + int32_t r[2][2]; + } test_vec[] = { + { { { -INT32_C(617), INT32_C(1303) }, + { -INT32_C(4779), -INT32_C(4756) } }, + { INT32_C(4193), -INT32_C(4032)}, + { { INT32_C(4193), INT32_C(1303) }, + { -INT32_C(4032), -INT32_C(4756) } } }, + { { { -INT32_C(1012), -INT32_C(3240) }, + { -INT32_C(3259), -INT32_C(3396) } }, + { -INT32_C(3438), -INT32_C(693)}, + { { -INT32_C(1012), -INT32_C(3438) }, + { -INT32_C(3259), -INT32_C(693) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x2x2_t r, src, expected; + src.val[0] = simde_vld1_s32(test_vec[i].src[0]); + src.val[1] = simde_vld1_s32(test_vec[i].src[1]); + + SIMDE_CONSTIFY_2_(simde_vld2_lane_s32, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1_s32(test_vec[i].r[0]); + expected.val[1] = simde_vld1_s32(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_i32x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i32x2(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2_lane_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t src[2][1]; + int64_t buf[2]; + int64_t r[2][1]; + } test_vec[] = { + { { { -INT64_C(382464207) }, + { -INT64_C(391984763) } }, + { -INT64_C(283052241), INT64_C(176704794)}, + { { -INT64_C(283052241) }, + { INT64_C(176704794) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x1x2_t r, src, expected; + src.val[0] = simde_vld1_s64(test_vec[i].src[0]); + src.val[1] = simde_vld1_s64(test_vec[i].src[1]); + + r = simde_vld2_lane_s64(test_vec[i].buf, src, 0); + + expected.val[0] = simde_vld1_s64(test_vec[i].r[0]); + expected.val[1] = simde_vld1_s64(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_i64x1(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i64x1(r.val[1], expected.val[1]); + } + + return 0; +} + + +static int +test_simde_vld2_lane_u8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint8_t src[2][8]; + uint8_t buf[2]; + uint8_t r[2][8]; + } test_vec[] = { + { { { UINT8_C(16), UINT8_C(44), UINT8_C(6), UINT8_C(49), + UINT8_C(48), UINT8_C(22), UINT8_C(4), UINT8_C(6) }, + { UINT8_C(7), UINT8_C(40), UINT8_C(17), UINT8_C(15), + UINT8_C(21), UINT8_C(4), UINT8_C(14), UINT8_C(25) } }, + { UINT8_C(43), UINT8_C(26)}, + { { UINT8_C(43), UINT8_C(44), UINT8_C(6), UINT8_C(49), + UINT8_C(48), UINT8_C(22), UINT8_C(4), UINT8_C(6) }, + { UINT8_C(26), UINT8_C(40), UINT8_C(17), UINT8_C(15), + UINT8_C(21), UINT8_C(4), UINT8_C(14), UINT8_C(25) } } }, + { { { UINT8_C(0), UINT8_C(47), UINT8_C(9), UINT8_C(28), + UINT8_C(8), UINT8_C(39), UINT8_C(37), UINT8_C(35) }, + { UINT8_C(38), UINT8_C(35), UINT8_C(27), UINT8_C(16), + UINT8_C(14), UINT8_C(1), UINT8_C(16), UINT8_C(5) } }, + { UINT8_C(48), UINT8_C(10)}, + { { UINT8_C(0), UINT8_C(48), UINT8_C(9), UINT8_C(28), + UINT8_C(8), UINT8_C(39), UINT8_C(37), UINT8_C(35) }, + { UINT8_C(38), UINT8_C(10), UINT8_C(27), UINT8_C(16), + UINT8_C(14), UINT8_C(1), UINT8_C(16), UINT8_C(5) } } }, + { { { UINT8_C(45), UINT8_C(48), UINT8_C(30), UINT8_C(12), + UINT8_C(2), UINT8_C(21), UINT8_C(16), UINT8_C(39) }, + { UINT8_C(3), UINT8_C(30), UINT8_C(37), UINT8_C(12), + UINT8_C(17), UINT8_C(23), UINT8_C(41), UINT8_C(18) } }, + { UINT8_C(30), UINT8_C(13)}, + { { UINT8_C(45), UINT8_C(48), UINT8_C(30), UINT8_C(12), + UINT8_C(2), UINT8_C(21), UINT8_C(16), UINT8_C(39) }, + { UINT8_C(3), UINT8_C(30), UINT8_C(13), UINT8_C(12), + UINT8_C(17), UINT8_C(23), UINT8_C(41), UINT8_C(18) } } }, + { { { UINT8_C(9), UINT8_C(39), UINT8_C(49), UINT8_C(10), + UINT8_C(45), UINT8_C(24), UINT8_C(17), UINT8_C(30) }, + { UINT8_C(36), UINT8_C(20), UINT8_C(15), UINT8_C(21), + UINT8_C(23), UINT8_C(22), UINT8_C(39), UINT8_C(18) } }, + { UINT8_C(13), UINT8_C(37)}, + { { UINT8_C(9), UINT8_C(39), UINT8_C(49), UINT8_C(13), + UINT8_C(45), UINT8_C(24), UINT8_C(17), UINT8_C(30) }, + { UINT8_C(36), UINT8_C(20), UINT8_C(15), UINT8_C(37), + UINT8_C(23), UINT8_C(22), UINT8_C(39), UINT8_C(18) } } }, + { { { UINT8_C(17), UINT8_C(17), UINT8_C(11), UINT8_C(43), + UINT8_C(20), UINT8_C(10), UINT8_C(44), UINT8_C(43) }, + { UINT8_C(23), UINT8_C(4), UINT8_C(32), UINT8_C(2), + UINT8_C(39), UINT8_C(1), UINT8_C(37), UINT8_C(42) } }, + { UINT8_C(40), UINT8_C(34)}, + { { UINT8_C(17), UINT8_C(17), UINT8_C(11), UINT8_C(43), + UINT8_C(40), UINT8_C(10), UINT8_C(44), UINT8_C(43) }, + { UINT8_C(23), UINT8_C(4), UINT8_C(32), UINT8_C(2), + UINT8_C(34), UINT8_C(1), UINT8_C(37), UINT8_C(42) } } }, + { { { UINT8_C(16), UINT8_C(35), UINT8_C(33), UINT8_C(19), + UINT8_C(15), UINT8_C(17), UINT8_C(13), UINT8_C(49) }, + { UINT8_C(21), UINT8_C(24), UINT8_C(3), UINT8_C(10), + UINT8_C(31), UINT8_C(24), UINT8_C(46), UINT8_C(14) } }, + { UINT8_C(24), UINT8_C(3)}, + { { UINT8_C(16), UINT8_C(35), UINT8_C(33), UINT8_C(19), + UINT8_C(15), UINT8_C(24), UINT8_C(13), UINT8_C(49) }, + { UINT8_C(21), UINT8_C(24), UINT8_C(3), UINT8_C(10), + UINT8_C(31), UINT8_C(3), UINT8_C(46), UINT8_C(14) } } }, + { { { UINT8_C(25), UINT8_C(9), UINT8_C(16), UINT8_C(33), + UINT8_C(14), UINT8_C(45), UINT8_C(0), UINT8_C(45) }, + { UINT8_C(40), UINT8_C(36), UINT8_C(38), UINT8_C(48), + UINT8_C(20), UINT8_C(22), UINT8_C(1), UINT8_C(43) } }, + { UINT8_C(33), UINT8_C(50)}, + { { UINT8_C(25), UINT8_C(9), UINT8_C(16), UINT8_C(33), + UINT8_C(14), UINT8_C(45), UINT8_C(33), UINT8_C(45) }, + { UINT8_C(40), UINT8_C(36), UINT8_C(38), UINT8_C(48), + UINT8_C(20), UINT8_C(22), UINT8_C(50), UINT8_C(43) } } }, + { { { UINT8_C(38), UINT8_C(16), UINT8_C(32), UINT8_C(48), + UINT8_C(28), UINT8_C(6), UINT8_C(48), UINT8_C(36) }, + { UINT8_C(12), UINT8_C(7), UINT8_C(12), UINT8_C(12), + UINT8_C(33), UINT8_C(28), UINT8_C(26), UINT8_C(43) } }, + { UINT8_C(42), UINT8_C(7)}, + { { UINT8_C(38), UINT8_C(16), UINT8_C(32), UINT8_C(48), + UINT8_C(28), UINT8_C(6), UINT8_C(48), UINT8_C(42) }, + { UINT8_C(12), UINT8_C(7), UINT8_C(12), UINT8_C(12), + UINT8_C(33), UINT8_C(28), UINT8_C(26), UINT8_C(7) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x8x2_t r, src, expected; + src.val[0] = simde_vld1_u8(test_vec[i].src[0]); + src.val[1] = simde_vld1_u8(test_vec[i].src[1]); + + SIMDE_CONSTIFY_8_(simde_vld2_lane_u8, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1_u8(test_vec[i].r[0]); + expected.val[1] = simde_vld1_u8(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_u8x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u8x8(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t src[2][4]; + uint16_t buf[2]; + uint16_t r[2][4]; + } test_vec[] = { + { { { UINT16_C(124), UINT16_C(412), UINT16_C(312), UINT16_C(107) }, + { UINT16_C(349), UINT16_C(217), UINT16_C(241), UINT16_C(95) } }, + { UINT16_C(275), UINT16_C(317)}, + { { UINT16_C(275), UINT16_C(412), UINT16_C(312), UINT16_C(107) }, + { UINT16_C(317), UINT16_C(217), UINT16_C(241), UINT16_C(95) } } }, + { { { UINT16_C(378), UINT16_C(448), UINT16_C(60), UINT16_C(474) }, + { UINT16_C(266), UINT16_C(135), UINT16_C(422), UINT16_C(223) } }, + { UINT16_C(297), UINT16_C(5)}, + { { UINT16_C(378), UINT16_C(297), UINT16_C(60), UINT16_C(474) }, + { UINT16_C(266), UINT16_C(5), UINT16_C(422), UINT16_C(223) } } }, + { { { UINT16_C(197), UINT16_C(3), UINT16_C(245), UINT16_C(218) }, + { UINT16_C(216), UINT16_C(262), UINT16_C(180), UINT16_C(60) } }, + { UINT16_C(96), UINT16_C(492)}, + { { UINT16_C(197), UINT16_C(3), UINT16_C(96), UINT16_C(218) }, + { UINT16_C(216), UINT16_C(262), UINT16_C(492), UINT16_C(60) } } }, + { { { UINT16_C(395), UINT16_C(374), UINT16_C(448), UINT16_C(445) }, + { UINT16_C(60), UINT16_C(486), UINT16_C(283), UINT16_C(298) } }, + { UINT16_C(218), UINT16_C(225)}, + { { UINT16_C(395), UINT16_C(374), UINT16_C(448), UINT16_C(218) }, + { UINT16_C(60), UINT16_C(486), UINT16_C(283), UINT16_C(225) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x4x2_t r, src, expected; + src.val[0] = simde_vld1_u16(test_vec[i].src[0]); + src.val[1] = simde_vld1_u16(test_vec[i].src[1]); + + SIMDE_CONSTIFY_4_(simde_vld2_lane_u16, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1_u16(test_vec[i].r[0]); + expected.val[1] = simde_vld1_u16(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_u16x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u16x4(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t src[2][2]; + uint32_t buf[2]; + uint32_t r[2][2]; + } test_vec[] = { + { { { UINT32_C(617), UINT32_C(1303) }, + { UINT32_C(4779), UINT32_C(4756) } }, + { UINT32_C(4193), UINT32_C(4032)}, + { { UINT32_C(4193), UINT32_C(1303) }, + { UINT32_C(4032), UINT32_C(4756) } } }, + { { { UINT32_C(1012), UINT32_C(3240) }, + { UINT32_C(3259), UINT32_C(3396) } }, + { UINT32_C(3438), UINT32_C(693)}, + { { UINT32_C(1012), UINT32_C(3438) }, + { UINT32_C(3259), UINT32_C(693) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x2x2_t r, src, expected; + src.val[0] = simde_vld1_u32(test_vec[i].src[0]); + src.val[1] = simde_vld1_u32(test_vec[i].src[1]); + + SIMDE_CONSTIFY_2_(simde_vld2_lane_u32, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1_u32(test_vec[i].r[0]); + expected.val[1] = simde_vld1_u32(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_u32x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u32x2(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t src[2][1]; + uint64_t buf[2]; + uint64_t r[2][1]; + } test_vec[] = { + { { { UINT64_C(382464207) }, + { UINT64_C(391984763) } }, + { UINT64_C(283052241), UINT64_C(176704794)}, + { { UINT64_C(283052241) }, + { UINT64_C(176704794) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x1x2_t r, src, expected; + src.val[0] = simde_vld1_u64(test_vec[i].src[0]); + src.val[1] = simde_vld1_u64(test_vec[i].src[1]); + + r = simde_vld2_lane_u64(test_vec[i].buf, src, 0); + + expected.val[0] = simde_vld1_u64(test_vec[i].r[0]); + expected.val[1] = simde_vld1_u64(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_u64x1(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u64x1(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t src[2][4]; + simde_float16_t buf[2]; + simde_float16_t r[2][4]; + } test_vec[] = { + { { { SIMDE_FLOAT16_VALUE(-45.80), SIMDE_FLOAT16_VALUE(29.77), SIMDE_FLOAT16_VALUE(44.10), SIMDE_FLOAT16_VALUE(-17.29) }, + { SIMDE_FLOAT16_VALUE(-34.30), SIMDE_FLOAT16_VALUE(-46.41), SIMDE_FLOAT16_VALUE(-43.15), SIMDE_FLOAT16_VALUE(-30.34) } }, + { SIMDE_FLOAT16_VALUE(-16.64), SIMDE_FLOAT16_VALUE(-13.70)}, + { { SIMDE_FLOAT16_VALUE(-16.64), SIMDE_FLOAT16_VALUE(29.77), SIMDE_FLOAT16_VALUE(44.10), SIMDE_FLOAT16_VALUE(-17.29) }, + { SIMDE_FLOAT16_VALUE(-13.70), SIMDE_FLOAT16_VALUE(-46.41), SIMDE_FLOAT16_VALUE(-43.15), SIMDE_FLOAT16_VALUE(-30.34) } } }, + { { { SIMDE_FLOAT16_VALUE(-31.49), SIMDE_FLOAT16_VALUE(-3.02), SIMDE_FLOAT16_VALUE(4.73), SIMDE_FLOAT16_VALUE(29.19) }, + { SIMDE_FLOAT16_VALUE(-6.51), SIMDE_FLOAT16_VALUE(-6.97), SIMDE_FLOAT16_VALUE(-24.86), SIMDE_FLOAT16_VALUE(20.04) } }, + { SIMDE_FLOAT16_VALUE(4.78), SIMDE_FLOAT16_VALUE(-24.76)}, + { { SIMDE_FLOAT16_VALUE(-31.49), SIMDE_FLOAT16_VALUE(4.78), SIMDE_FLOAT16_VALUE(4.73), SIMDE_FLOAT16_VALUE(29.19) }, + { SIMDE_FLOAT16_VALUE(-6.51), SIMDE_FLOAT16_VALUE(-24.76), SIMDE_FLOAT16_VALUE(-24.86), SIMDE_FLOAT16_VALUE(20.04) } } }, + { { { SIMDE_FLOAT16_VALUE(-33.05), SIMDE_FLOAT16_VALUE(44.60), SIMDE_FLOAT16_VALUE(44.07), SIMDE_FLOAT16_VALUE(-42.37) }, + { SIMDE_FLOAT16_VALUE(45.53), SIMDE_FLOAT16_VALUE(4.23), SIMDE_FLOAT16_VALUE(13.47), SIMDE_FLOAT16_VALUE(2.19) } }, + { SIMDE_FLOAT16_VALUE(-14.88), SIMDE_FLOAT16_VALUE(-15.37)}, + { { SIMDE_FLOAT16_VALUE(-33.05), SIMDE_FLOAT16_VALUE(44.60), SIMDE_FLOAT16_VALUE(-14.88), SIMDE_FLOAT16_VALUE(-42.37) }, + { SIMDE_FLOAT16_VALUE(45.53), SIMDE_FLOAT16_VALUE(4.23), SIMDE_FLOAT16_VALUE(-15.37), SIMDE_FLOAT16_VALUE(2.19) } } }, + { { { SIMDE_FLOAT16_VALUE(22.74), SIMDE_FLOAT16_VALUE(-1.07), SIMDE_FLOAT16_VALUE(48.37), SIMDE_FLOAT16_VALUE(26.68) }, + { SIMDE_FLOAT16_VALUE(-34.05), SIMDE_FLOAT16_VALUE(-26.07), SIMDE_FLOAT16_VALUE(-44.28), SIMDE_FLOAT16_VALUE(6.92) } }, + { SIMDE_FLOAT16_VALUE(-28.27), SIMDE_FLOAT16_VALUE(-30.31)}, + { { SIMDE_FLOAT16_VALUE(22.74), SIMDE_FLOAT16_VALUE(-1.07), SIMDE_FLOAT16_VALUE(48.37), SIMDE_FLOAT16_VALUE(-28.27) }, + { SIMDE_FLOAT16_VALUE(-34.05), SIMDE_FLOAT16_VALUE(-26.07), SIMDE_FLOAT16_VALUE(-44.28), SIMDE_FLOAT16_VALUE(-30.31) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4x2_t r, src, expected; + src.val[0] = simde_vld1_f16(test_vec[i].src[0]); + src.val[1] = simde_vld1_f16(test_vec[i].src[1]); + + SIMDE_CONSTIFY_4_(simde_vld2_lane_f16, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1_f16(test_vec[i].r[0]); + expected.val[1] = simde_vld1_f16(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_f16x4(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f16x4(r.val[1], expected.val[1], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld2_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32_t src[2][2]; + simde_float32_t buf[2]; + simde_float32_t r[2][2]; + } test_vec[] = { + { { { SIMDE_FLOAT32_C(1651.66), SIMDE_FLOAT32_C(4335.15) }, + { SIMDE_FLOAT32_C(3302.66), SIMDE_FLOAT32_C(2212.82) } }, + { SIMDE_FLOAT32_C(-293.68), SIMDE_FLOAT32_C(4822.66)}, + { { SIMDE_FLOAT32_C(-293.68), SIMDE_FLOAT32_C(4335.15) }, + { SIMDE_FLOAT32_C(4822.66), SIMDE_FLOAT32_C(2212.82) } } }, + { { { SIMDE_FLOAT32_C(-569.07), SIMDE_FLOAT32_C(1299.98) }, + { SIMDE_FLOAT32_C(2879.02), SIMDE_FLOAT32_C(4991.35) } }, + { SIMDE_FLOAT32_C(3143.46), SIMDE_FLOAT32_C(2846.67)}, + { { SIMDE_FLOAT32_C(-569.07), SIMDE_FLOAT32_C(3143.46) }, + { SIMDE_FLOAT32_C(2879.02), SIMDE_FLOAT32_C(2846.67) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2x2_t r, src, expected; + src.val[0] = simde_vld1_f32(test_vec[i].src[0]); + src.val[1] = simde_vld1_f32(test_vec[i].src[1]); + + SIMDE_CONSTIFY_2_(simde_vld2_lane_f32, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1_f32(test_vec[i].r[0]); + expected.val[1] = simde_vld1_f32(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_f32x2(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f32x2(r.val[1], expected.val[1], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld2_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64_t src[2][1]; + simde_float64_t buf[2]; + simde_float64_t r[2][1]; + } test_vec[] = { + { { { SIMDE_FLOAT64_C(-463008.81) }, + { SIMDE_FLOAT64_C(-130916.96) } }, + { SIMDE_FLOAT64_C(392863.67), SIMDE_FLOAT64_C(310874.94)}, + { { SIMDE_FLOAT64_C(392863.67) }, + { SIMDE_FLOAT64_C(310874.94) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1x2_t r, src, expected; + src.val[0] = simde_vld1_f64(test_vec[i].src[0]); + src.val[1] = simde_vld1_f64(test_vec[i].src[1]); + + r = simde_vld2_lane_f64(test_vec[i].buf, src, 0); + + expected.val[0] = simde_vld1_f64(test_vec[i].r[0]); + expected.val[1] = simde_vld1_f64(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_f64x1(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f64x1(r.val[1], expected.val[1], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld2q_lane_s8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int8_t src[2][16]; + int8_t buf[2]; + int8_t r[2][16]; + } test_vec[] = { + { { { INT8_C(46), INT8_C(42), -INT8_C(1), INT8_C(9), -INT8_C(24), -INT8_C(23), -INT8_C(11), INT8_C(37), + INT8_C(22), INT8_C(30), INT8_C(25), INT8_C(27), -INT8_C(10), -INT8_C(26), -INT8_C(1), -INT8_C(10) }, + { INT8_C(13), INT8_C(29), -INT8_C(22), INT8_C(40), -INT8_C(48), -INT8_C(40), INT8_C(0), -INT8_C(12), + -INT8_C(18), -INT8_C(35), INT8_C(43), -INT8_C(30), -INT8_C(32), INT8_C(30), -INT8_C(21), -INT8_C(36) } }, + { -INT8_C(13), -INT8_C(15)}, + { { -INT8_C(13), INT8_C(42), -INT8_C(1), INT8_C(9), -INT8_C(24), -INT8_C(23), -INT8_C(11), INT8_C(37), + INT8_C(22), INT8_C(30), INT8_C(25), INT8_C(27), -INT8_C(10), -INT8_C(26), -INT8_C(1), -INT8_C(10) }, + { -INT8_C(15), INT8_C(29), -INT8_C(22), INT8_C(40), -INT8_C(48), -INT8_C(40), INT8_C(0), -INT8_C(12), + -INT8_C(18), -INT8_C(35), INT8_C(43), -INT8_C(30), -INT8_C(32), INT8_C(30), -INT8_C(21), -INT8_C(36) } } }, + { { { -INT8_C(20), -INT8_C(7), INT8_C(32), -INT8_C(38), INT8_C(1), -INT8_C(28), -INT8_C(5), -INT8_C(1), + -INT8_C(17), INT8_C(10), INT8_C(10), INT8_C(23), -INT8_C(26), INT8_C(1), -INT8_C(37), -INT8_C(35) }, + { INT8_C(18), -INT8_C(31), INT8_C(37), INT8_C(26), INT8_C(9), INT8_C(14), -INT8_C(17), INT8_C(38), + -INT8_C(8), -INT8_C(27), INT8_C(45), -INT8_C(15), INT8_C(0), INT8_C(36), INT8_C(1), -INT8_C(20) } }, + { -INT8_C(11), INT8_C(6)}, + { { -INT8_C(20), -INT8_C(11), INT8_C(32), -INT8_C(38), INT8_C(1), -INT8_C(28), -INT8_C(5), -INT8_C(1), + -INT8_C(17), INT8_C(10), INT8_C(10), INT8_C(23), -INT8_C(26), INT8_C(1), -INT8_C(37), -INT8_C(35) }, + { INT8_C(18), INT8_C(6), INT8_C(37), INT8_C(26), INT8_C(9), INT8_C(14), -INT8_C(17), INT8_C(38), + -INT8_C(8), -INT8_C(27), INT8_C(45), -INT8_C(15), INT8_C(0), INT8_C(36), INT8_C(1), -INT8_C(20) } } }, + { { { INT8_C(13), -INT8_C(26), -INT8_C(44), INT8_C(35), -INT8_C(42), -INT8_C(3), INT8_C(9), INT8_C(23), + INT8_C(0), -INT8_C(20), INT8_C(17), INT8_C(46), INT8_C(15), INT8_C(39), -INT8_C(26), -INT8_C(34) }, + { INT8_C(5), INT8_C(22), -INT8_C(30), INT8_C(9), INT8_C(17), -INT8_C(44), -INT8_C(46), -INT8_C(25), + -INT8_C(45), INT8_C(46), INT8_C(16), -INT8_C(6), -INT8_C(40), INT8_C(37), -INT8_C(15), -INT8_C(11) } }, + { INT8_C(25), INT8_C(25)}, + { { INT8_C(13), -INT8_C(26), INT8_C(25), INT8_C(35), -INT8_C(42), -INT8_C(3), INT8_C(9), INT8_C(23), + INT8_C(0), -INT8_C(20), INT8_C(17), INT8_C(46), INT8_C(15), INT8_C(39), -INT8_C(26), -INT8_C(34) }, + { INT8_C(5), INT8_C(22), INT8_C(25), INT8_C(9), INT8_C(17), -INT8_C(44), -INT8_C(46), -INT8_C(25), + -INT8_C(45), INT8_C(46), INT8_C(16), -INT8_C(6), -INT8_C(40), INT8_C(37), -INT8_C(15), -INT8_C(11) } } }, + { { { -INT8_C(6), -INT8_C(37), INT8_C(49), INT8_C(9), INT8_C(38), -INT8_C(40), -INT8_C(21), -INT8_C(28), + -INT8_C(2), -INT8_C(1), -INT8_C(18), INT8_C(31), -INT8_C(34), -INT8_C(2), INT8_C(23), INT8_C(35) }, + { -INT8_C(17), INT8_C(12), -INT8_C(24), -INT8_C(48), -INT8_C(30), -INT8_C(16), INT8_C(21), INT8_C(31), + INT8_C(39), INT8_C(29), INT8_C(4), -INT8_C(18), -INT8_C(47), INT8_C(15), -INT8_C(49), INT8_C(18) } }, + { -INT8_C(41), INT8_C(33)}, + { { -INT8_C(6), -INT8_C(37), INT8_C(49), -INT8_C(41), INT8_C(38), -INT8_C(40), -INT8_C(21), -INT8_C(28), + -INT8_C(2), -INT8_C(1), -INT8_C(18), INT8_C(31), -INT8_C(34), -INT8_C(2), INT8_C(23), INT8_C(35) }, + { -INT8_C(17), INT8_C(12), -INT8_C(24), INT8_C(33), -INT8_C(30), -INT8_C(16), INT8_C(21), INT8_C(31), + INT8_C(39), INT8_C(29), INT8_C(4), -INT8_C(18), -INT8_C(47), INT8_C(15), -INT8_C(49), INT8_C(18) } } }, + { { { INT8_C(10), -INT8_C(27), INT8_C(21), -INT8_C(23), -INT8_C(17), -INT8_C(42), INT8_C(24), -INT8_C(35), + -INT8_C(38), -INT8_C(3), INT8_C(29), INT8_C(41), -INT8_C(44), -INT8_C(15), -INT8_C(48), -INT8_C(26) }, + { -INT8_C(9), -INT8_C(47), -INT8_C(29), INT8_C(8), INT8_C(7), INT8_C(6), -INT8_C(26), INT8_C(9), + -INT8_C(11), -INT8_C(2), -INT8_C(6), INT8_C(22), INT8_C(34), -INT8_C(5), -INT8_C(38), -INT8_C(18) } }, + { INT8_C(21), -INT8_C(25)}, + { { INT8_C(10), -INT8_C(27), INT8_C(21), -INT8_C(23), INT8_C(21), -INT8_C(42), INT8_C(24), -INT8_C(35), + -INT8_C(38), -INT8_C(3), INT8_C(29), INT8_C(41), -INT8_C(44), -INT8_C(15), -INT8_C(48), -INT8_C(26) }, + { -INT8_C(9), -INT8_C(47), -INT8_C(29), INT8_C(8), -INT8_C(25), INT8_C(6), -INT8_C(26), INT8_C(9), + -INT8_C(11), -INT8_C(2), -INT8_C(6), INT8_C(22), INT8_C(34), -INT8_C(5), -INT8_C(38), -INT8_C(18) } } }, + { { { INT8_C(2), INT8_C(33), INT8_C(23), INT8_C(0), -INT8_C(48), INT8_C(2), INT8_C(26), INT8_C(29), + INT8_C(38), INT8_C(14), INT8_C(48), INT8_C(0), -INT8_C(44), -INT8_C(42), -INT8_C(37), INT8_C(5) }, + { INT8_C(24), INT8_C(4), INT8_C(37), INT8_C(46), INT8_C(40), INT8_C(24), INT8_C(40), -INT8_C(18), + -INT8_C(44), INT8_C(48), -INT8_C(36), INT8_C(45), INT8_C(2), INT8_C(4), -INT8_C(35), -INT8_C(9) } }, + { INT8_C(15), INT8_C(13)}, + { { INT8_C(2), INT8_C(33), INT8_C(23), INT8_C(0), -INT8_C(48), INT8_C(15), INT8_C(26), INT8_C(29), + INT8_C(38), INT8_C(14), INT8_C(48), INT8_C(0), -INT8_C(44), -INT8_C(42), -INT8_C(37), INT8_C(5) }, + { INT8_C(24), INT8_C(4), INT8_C(37), INT8_C(46), INT8_C(40), INT8_C(13), INT8_C(40), -INT8_C(18), + -INT8_C(44), INT8_C(48), -INT8_C(36), INT8_C(45), INT8_C(2), INT8_C(4), -INT8_C(35), -INT8_C(9) } } }, + { { { -INT8_C(19), INT8_C(26), -INT8_C(45), INT8_C(37), -INT8_C(27), -INT8_C(2), INT8_C(43), -INT8_C(37), + -INT8_C(26), -INT8_C(18), INT8_C(35), INT8_C(9), -INT8_C(15), INT8_C(38), -INT8_C(12), -INT8_C(35) }, + { INT8_C(9), -INT8_C(5), -INT8_C(6), -INT8_C(48), INT8_C(10), -INT8_C(7), -INT8_C(18), INT8_C(18), + -INT8_C(5), -INT8_C(48), -INT8_C(36), INT8_C(12), -INT8_C(8), INT8_C(23), INT8_C(0), -INT8_C(27) } }, + { -INT8_C(19), -INT8_C(38)}, + { { -INT8_C(19), INT8_C(26), -INT8_C(45), INT8_C(37), -INT8_C(27), -INT8_C(2), -INT8_C(19), -INT8_C(37), + -INT8_C(26), -INT8_C(18), INT8_C(35), INT8_C(9), -INT8_C(15), INT8_C(38), -INT8_C(12), -INT8_C(35) }, + { INT8_C(9), -INT8_C(5), -INT8_C(6), -INT8_C(48), INT8_C(10), -INT8_C(7), -INT8_C(38), INT8_C(18), + -INT8_C(5), -INT8_C(48), -INT8_C(36), INT8_C(12), -INT8_C(8), INT8_C(23), INT8_C(0), -INT8_C(27) } } }, + { { { INT8_C(27), INT8_C(17), -INT8_C(31), INT8_C(14), INT8_C(14), INT8_C(10), INT8_C(47), INT8_C(29), + -INT8_C(16), INT8_C(21), -INT8_C(21), -INT8_C(49), INT8_C(40), -INT8_C(7), -INT8_C(47), INT8_C(0) }, + { -INT8_C(39), -INT8_C(29), -INT8_C(1), -INT8_C(3), -INT8_C(6), INT8_C(48), INT8_C(32), -INT8_C(2), + -INT8_C(14), INT8_C(9), -INT8_C(49), -INT8_C(9), -INT8_C(7), INT8_C(24), INT8_C(42), INT8_C(39) } }, + { INT8_C(4), -INT8_C(15)}, + { { INT8_C(27), INT8_C(17), -INT8_C(31), INT8_C(14), INT8_C(14), INT8_C(10), INT8_C(47), INT8_C(4), + -INT8_C(16), INT8_C(21), -INT8_C(21), -INT8_C(49), INT8_C(40), -INT8_C(7), -INT8_C(47), INT8_C(0) }, + { -INT8_C(39), -INT8_C(29), -INT8_C(1), -INT8_C(3), -INT8_C(6), INT8_C(48), INT8_C(32), -INT8_C(15), + -INT8_C(14), INT8_C(9), -INT8_C(49), -INT8_C(9), -INT8_C(7), INT8_C(24), INT8_C(42), INT8_C(39) } } }, + { { { INT8_C(37), INT8_C(15), -INT8_C(46), INT8_C(31), -INT8_C(34), INT8_C(45), -INT8_C(14), INT8_C(4), + -INT8_C(14), INT8_C(26), INT8_C(46), INT8_C(29), INT8_C(18), INT8_C(8), -INT8_C(42), -INT8_C(46) }, + { -INT8_C(21), INT8_C(6), INT8_C(17), -INT8_C(27), -INT8_C(20), -INT8_C(32), -INT8_C(7), INT8_C(47), + -INT8_C(2), -INT8_C(20), INT8_C(26), INT8_C(18), -INT8_C(49), -INT8_C(34), -INT8_C(49), INT8_C(20) } }, + { INT8_C(35), -INT8_C(38)}, + { { INT8_C(37), INT8_C(15), -INT8_C(46), INT8_C(31), -INT8_C(34), INT8_C(45), -INT8_C(14), INT8_C(4), + INT8_C(35), INT8_C(26), INT8_C(46), INT8_C(29), INT8_C(18), INT8_C(8), -INT8_C(42), -INT8_C(46) }, + { -INT8_C(21), INT8_C(6), INT8_C(17), -INT8_C(27), -INT8_C(20), -INT8_C(32), -INT8_C(7), INT8_C(47), + -INT8_C(38), -INT8_C(20), INT8_C(26), INT8_C(18), -INT8_C(49), -INT8_C(34), -INT8_C(49), INT8_C(20) } } }, + { { { -INT8_C(3), -INT8_C(7), -INT8_C(11), INT8_C(42), -INT8_C(30), INT8_C(18), -INT8_C(19), INT8_C(10), + INT8_C(28), INT8_C(27), -INT8_C(40), INT8_C(8), -INT8_C(37), -INT8_C(45), -INT8_C(46), INT8_C(28) }, + { -INT8_C(20), -INT8_C(48), INT8_C(0), INT8_C(46), INT8_C(25), -INT8_C(39), INT8_C(0), -INT8_C(21), + INT8_C(25), INT8_C(28), -INT8_C(48), -INT8_C(12), -INT8_C(13), -INT8_C(44), -INT8_C(45), -INT8_C(26) } }, + { INT8_C(47), INT8_C(22)}, + { { -INT8_C(3), -INT8_C(7), -INT8_C(11), INT8_C(42), -INT8_C(30), INT8_C(18), -INT8_C(19), INT8_C(10), + INT8_C(28), INT8_C(47), -INT8_C(40), INT8_C(8), -INT8_C(37), -INT8_C(45), -INT8_C(46), INT8_C(28) }, + { -INT8_C(20), -INT8_C(48), INT8_C(0), INT8_C(46), INT8_C(25), -INT8_C(39), INT8_C(0), -INT8_C(21), + INT8_C(25), INT8_C(22), -INT8_C(48), -INT8_C(12), -INT8_C(13), -INT8_C(44), -INT8_C(45), -INT8_C(26) } } }, + { { { -INT8_C(3), INT8_C(35), -INT8_C(46), INT8_C(27), -INT8_C(13), INT8_C(31), -INT8_C(44), INT8_C(37), + INT8_C(46), -INT8_C(47), -INT8_C(29), INT8_C(24), INT8_C(0), INT8_C(31), -INT8_C(24), INT8_C(39) }, + { INT8_C(8), INT8_C(45), -INT8_C(22), -INT8_C(28), INT8_C(9), -INT8_C(19), -INT8_C(27), -INT8_C(1), + INT8_C(47), INT8_C(38), -INT8_C(11), -INT8_C(30), -INT8_C(18), INT8_C(13), -INT8_C(36), INT8_C(6) } }, + { INT8_C(27), INT8_C(45)}, + { { -INT8_C(3), INT8_C(35), -INT8_C(46), INT8_C(27), -INT8_C(13), INT8_C(31), -INT8_C(44), INT8_C(37), + INT8_C(46), -INT8_C(47), INT8_C(27), INT8_C(24), INT8_C(0), INT8_C(31), -INT8_C(24), INT8_C(39) }, + { INT8_C(8), INT8_C(45), -INT8_C(22), -INT8_C(28), INT8_C(9), -INT8_C(19), -INT8_C(27), -INT8_C(1), + INT8_C(47), INT8_C(38), INT8_C(45), -INT8_C(30), -INT8_C(18), INT8_C(13), -INT8_C(36), INT8_C(6) } } }, + { { { -INT8_C(27), INT8_C(22), -INT8_C(2), -INT8_C(45), -INT8_C(16), -INT8_C(20), -INT8_C(11), INT8_C(41), + INT8_C(5), INT8_C(39), INT8_C(35), INT8_C(0), -INT8_C(42), INT8_C(15), -INT8_C(7), -INT8_C(9) }, + { -INT8_C(42), INT8_C(1), INT8_C(5), -INT8_C(26), -INT8_C(17), -INT8_C(42), -INT8_C(12), INT8_C(7), + INT8_C(43), -INT8_C(27), -INT8_C(46), -INT8_C(18), -INT8_C(44), INT8_C(7), INT8_C(49), INT8_C(24) } }, + { INT8_C(14), -INT8_C(8)}, + { { -INT8_C(27), INT8_C(22), -INT8_C(2), -INT8_C(45), -INT8_C(16), -INT8_C(20), -INT8_C(11), INT8_C(41), + INT8_C(5), INT8_C(39), INT8_C(35), INT8_C(14), -INT8_C(42), INT8_C(15), -INT8_C(7), -INT8_C(9) }, + { -INT8_C(42), INT8_C(1), INT8_C(5), -INT8_C(26), -INT8_C(17), -INT8_C(42), -INT8_C(12), INT8_C(7), + INT8_C(43), -INT8_C(27), -INT8_C(46), -INT8_C(8), -INT8_C(44), INT8_C(7), INT8_C(49), INT8_C(24) } } }, + { { { INT8_C(49), -INT8_C(24), -INT8_C(24), -INT8_C(11), INT8_C(12), -INT8_C(25), INT8_C(0), INT8_C(3), + -INT8_C(10), -INT8_C(16), INT8_C(24), INT8_C(47), INT8_C(42), INT8_C(8), INT8_C(45), -INT8_C(14) }, + { INT8_C(19), INT8_C(19), INT8_C(0), -INT8_C(4), INT8_C(9), INT8_C(37), INT8_C(17), INT8_C(6), + INT8_C(16), INT8_C(49), -INT8_C(39), INT8_C(30), INT8_C(42), -INT8_C(1), INT8_C(27), INT8_C(22) } }, + { -INT8_C(14), INT8_C(8)}, + { { INT8_C(49), -INT8_C(24), -INT8_C(24), -INT8_C(11), INT8_C(12), -INT8_C(25), INT8_C(0), INT8_C(3), + -INT8_C(10), -INT8_C(16), INT8_C(24), INT8_C(47), -INT8_C(14), INT8_C(8), INT8_C(45), -INT8_C(14) }, + { INT8_C(19), INT8_C(19), INT8_C(0), -INT8_C(4), INT8_C(9), INT8_C(37), INT8_C(17), INT8_C(6), + INT8_C(16), INT8_C(49), -INT8_C(39), INT8_C(30), INT8_C(8), -INT8_C(1), INT8_C(27), INT8_C(22) } } }, + { { { -INT8_C(6), -INT8_C(22), -INT8_C(33), INT8_C(31), INT8_C(29), -INT8_C(16), INT8_C(29), INT8_C(38), + -INT8_C(25), -INT8_C(9), INT8_C(19), INT8_C(12), -INT8_C(2), -INT8_C(47), INT8_C(19), -INT8_C(47) }, + { -INT8_C(42), INT8_C(2), -INT8_C(24), INT8_C(27), -INT8_C(3), INT8_C(20), INT8_C(2), -INT8_C(20), + -INT8_C(10), -INT8_C(29), -INT8_C(37), -INT8_C(33), -INT8_C(47), -INT8_C(1), INT8_C(31), -INT8_C(4) } }, + { -INT8_C(12), -INT8_C(31)}, + { { -INT8_C(6), -INT8_C(22), -INT8_C(33), INT8_C(31), INT8_C(29), -INT8_C(16), INT8_C(29), INT8_C(38), + -INT8_C(25), -INT8_C(9), INT8_C(19), INT8_C(12), -INT8_C(2), -INT8_C(12), INT8_C(19), -INT8_C(47) }, + { -INT8_C(42), INT8_C(2), -INT8_C(24), INT8_C(27), -INT8_C(3), INT8_C(20), INT8_C(2), -INT8_C(20), + -INT8_C(10), -INT8_C(29), -INT8_C(37), -INT8_C(33), -INT8_C(47), -INT8_C(31), INT8_C(31), -INT8_C(4) } } }, + { { { -INT8_C(4), -INT8_C(34), -INT8_C(11), -INT8_C(37), -INT8_C(11), INT8_C(18), INT8_C(0), INT8_C(13), + -INT8_C(45), INT8_C(20), -INT8_C(3), INT8_C(34), -INT8_C(42), -INT8_C(2), INT8_C(17), INT8_C(0) }, + { INT8_C(43), -INT8_C(15), INT8_C(29), -INT8_C(47), INT8_C(29), INT8_C(31), INT8_C(20), INT8_C(42), + INT8_C(37), INT8_C(6), -INT8_C(39), -INT8_C(2), INT8_C(40), -INT8_C(34), INT8_C(46), INT8_C(21) } }, + { -INT8_C(1), -INT8_C(27)}, + { { -INT8_C(4), -INT8_C(34), -INT8_C(11), -INT8_C(37), -INT8_C(11), INT8_C(18), INT8_C(0), INT8_C(13), + -INT8_C(45), INT8_C(20), -INT8_C(3), INT8_C(34), -INT8_C(42), -INT8_C(2), -INT8_C(1), INT8_C(0) }, + { INT8_C(43), -INT8_C(15), INT8_C(29), -INT8_C(47), INT8_C(29), INT8_C(31), INT8_C(20), INT8_C(42), + INT8_C(37), INT8_C(6), -INT8_C(39), -INT8_C(2), INT8_C(40), -INT8_C(34), -INT8_C(27), INT8_C(21) } } }, + { { { -INT8_C(45), INT8_C(24), INT8_C(18), INT8_C(2), -INT8_C(29), INT8_C(24), -INT8_C(25), INT8_C(8), + INT8_C(2), INT8_C(10), INT8_C(15), -INT8_C(30), INT8_C(16), INT8_C(4), INT8_C(46), -INT8_C(26) }, + { -INT8_C(24), -INT8_C(33), INT8_C(14), INT8_C(42), INT8_C(12), INT8_C(0), INT8_C(14), INT8_C(30), + -INT8_C(11), -INT8_C(3), INT8_C(46), -INT8_C(16), -INT8_C(32), -INT8_C(30), -INT8_C(15), INT8_C(0) } }, + { -INT8_C(18), -INT8_C(19)}, + { { -INT8_C(45), INT8_C(24), INT8_C(18), INT8_C(2), -INT8_C(29), INT8_C(24), -INT8_C(25), INT8_C(8), + INT8_C(2), INT8_C(10), INT8_C(15), -INT8_C(30), INT8_C(16), INT8_C(4), INT8_C(46), -INT8_C(18) }, + { -INT8_C(24), -INT8_C(33), INT8_C(14), INT8_C(42), INT8_C(12), INT8_C(0), INT8_C(14), INT8_C(30), + -INT8_C(11), -INT8_C(3), INT8_C(46), -INT8_C(16), -INT8_C(32), -INT8_C(30), -INT8_C(15), -INT8_C(19) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x16x2_t r, src, expected; + src.val[0] = simde_vld1q_s8(test_vec[i].src[0]); + src.val[1] = simde_vld1q_s8(test_vec[i].src[1]); + + SIMDE_CONSTIFY_16_(simde_vld2q_lane_s8, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_s8(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_s8(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_i8x16(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i8x16(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2q_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t src[2][8]; + int16_t buf[2]; + int16_t r[2][8]; + } test_vec[] = { + { { { INT16_C(418), INT16_C(216), INT16_C(347), INT16_C(476), + -INT16_C(360), INT16_C(143), -INT16_C(316), -INT16_C(114) }, + { INT16_C(387), -INT16_C(146), INT16_C(225), -INT16_C(190), + INT16_C(399), -INT16_C(128), INT16_C(400), INT16_C(257) } }, + { INT16_C(488), -INT16_C(232)}, + { { INT16_C(488), INT16_C(216), INT16_C(347), INT16_C(476), + -INT16_C(360), INT16_C(143), -INT16_C(316), -INT16_C(114) }, + { -INT16_C(232), -INT16_C(146), INT16_C(225), -INT16_C(190), + INT16_C(399), -INT16_C(128), INT16_C(400), INT16_C(257) } } }, + { { { -INT16_C(311), INT16_C(430), -INT16_C(36), INT16_C(399), + -INT16_C(84), INT16_C(165), INT16_C(434), -INT16_C(105) }, + { -INT16_C(449), INT16_C(455), INT16_C(390), -INT16_C(160), + INT16_C(77), INT16_C(147), INT16_C(38), INT16_C(271) } }, + { INT16_C(106), -INT16_C(427)}, + { { -INT16_C(311), INT16_C(106), -INT16_C(36), INT16_C(399), + -INT16_C(84), INT16_C(165), INT16_C(434), -INT16_C(105) }, + { -INT16_C(449), -INT16_C(427), INT16_C(390), -INT16_C(160), + INT16_C(77), INT16_C(147), INT16_C(38), INT16_C(271) } } }, + { { { INT16_C(181), -INT16_C(136), INT16_C(379), INT16_C(296), + -INT16_C(309), -INT16_C(435), -INT16_C(152), INT16_C(215) }, + { -INT16_C(379), INT16_C(147), INT16_C(345), -INT16_C(276), + INT16_C(105), -INT16_C(142), -INT16_C(485), -INT16_C(383) } }, + { -INT16_C(39), -INT16_C(212)}, + { { INT16_C(181), -INT16_C(136), -INT16_C(39), INT16_C(296), + -INT16_C(309), -INT16_C(435), -INT16_C(152), INT16_C(215) }, + { -INT16_C(379), INT16_C(147), -INT16_C(212), -INT16_C(276), + INT16_C(105), -INT16_C(142), -INT16_C(485), -INT16_C(383) } } }, + { { { INT16_C(181), -INT16_C(451), INT16_C(74), -INT16_C(26), + INT16_C(435), -INT16_C(466), INT16_C(458), -INT16_C(378) }, + { -INT16_C(292), -INT16_C(173), INT16_C(470), INT16_C(400), + -INT16_C(286), INT16_C(269), INT16_C(383), -INT16_C(317) } }, + { INT16_C(240), INT16_C(459)}, + { { INT16_C(181), -INT16_C(451), INT16_C(74), INT16_C(240), + INT16_C(435), -INT16_C(466), INT16_C(458), -INT16_C(378) }, + { -INT16_C(292), -INT16_C(173), INT16_C(470), INT16_C(459), + -INT16_C(286), INT16_C(269), INT16_C(383), -INT16_C(317) } } }, + { { { -INT16_C(377), -INT16_C(162), -INT16_C(3), -INT16_C(316), + -INT16_C(150), INT16_C(261), INT16_C(40), INT16_C(64) }, + { INT16_C(243), INT16_C(47), -INT16_C(381), -INT16_C(58), + INT16_C(290), INT16_C(64), -INT16_C(220), -INT16_C(89) } }, + { INT16_C(378), INT16_C(355)}, + { { -INT16_C(377), -INT16_C(162), -INT16_C(3), -INT16_C(316), + INT16_C(378), INT16_C(261), INT16_C(40), INT16_C(64) }, + { INT16_C(243), INT16_C(47), -INT16_C(381), -INT16_C(58), + INT16_C(355), INT16_C(64), -INT16_C(220), -INT16_C(89) } } }, + { { { -INT16_C(298), INT16_C(486), -INT16_C(96), INT16_C(132), + -INT16_C(108), INT16_C(284), INT16_C(54), INT16_C(224) }, + { INT16_C(249), -INT16_C(117), -INT16_C(26), -INT16_C(467), + -INT16_C(477), -INT16_C(25), INT16_C(300), -INT16_C(233) } }, + { -INT16_C(454), INT16_C(179)}, + { { -INT16_C(298), INT16_C(486), -INT16_C(96), INT16_C(132), + -INT16_C(108), -INT16_C(454), INT16_C(54), INT16_C(224) }, + { INT16_C(249), -INT16_C(117), -INT16_C(26), -INT16_C(467), + -INT16_C(477), INT16_C(179), INT16_C(300), -INT16_C(233) } } }, + { { { -INT16_C(15), -INT16_C(88), -INT16_C(166), INT16_C(374), + INT16_C(203), -INT16_C(81), INT16_C(316), -INT16_C(214) }, + { -INT16_C(227), INT16_C(24), -INT16_C(14), -INT16_C(100), + -INT16_C(31), INT16_C(86), INT16_C(474), -INT16_C(470) } }, + { INT16_C(325), -INT16_C(388)}, + { { -INT16_C(15), -INT16_C(88), -INT16_C(166), INT16_C(374), + INT16_C(203), -INT16_C(81), INT16_C(325), -INT16_C(214) }, + { -INT16_C(227), INT16_C(24), -INT16_C(14), -INT16_C(100), + -INT16_C(31), INT16_C(86), -INT16_C(388), -INT16_C(470) } } }, + { { { INT16_C(370), -INT16_C(25), -INT16_C(181), INT16_C(342), + INT16_C(6), -INT16_C(89), -INT16_C(30), INT16_C(425) }, + { -INT16_C(478), -INT16_C(331), INT16_C(463), INT16_C(400), + -INT16_C(469), -INT16_C(40), INT16_C(46), -INT16_C(134) } }, + { INT16_C(225), -INT16_C(7)}, + { { INT16_C(370), -INT16_C(25), -INT16_C(181), INT16_C(342), + INT16_C(6), -INT16_C(89), -INT16_C(30), INT16_C(225) }, + { -INT16_C(478), -INT16_C(331), INT16_C(463), INT16_C(400), + -INT16_C(469), -INT16_C(40), INT16_C(46), -INT16_C(7) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8x2_t r, src, expected; + src.val[0] = simde_vld1q_s16(test_vec[i].src[0]); + src.val[1] = simde_vld1q_s16(test_vec[i].src[1]); + + SIMDE_CONSTIFY_8_(simde_vld2q_lane_s16, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_s16(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_s16(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_i16x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i16x8(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2q_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t src[2][4]; + int32_t buf[2]; + int32_t r[2][4]; + } test_vec[] = { + { { { -INT32_C(224), -INT32_C(24), INT32_C(2014), INT32_C(3335) }, + { -INT32_C(4141), INT32_C(3456), -INT32_C(3311), -INT32_C(746) } }, + { -INT32_C(2149), -INT32_C(82)}, + { { -INT32_C(2149), -INT32_C(24), INT32_C(2014), INT32_C(3335) }, + { -INT32_C(82), INT32_C(3456), -INT32_C(3311), -INT32_C(746) } } }, + { { { -INT32_C(2915), -INT32_C(4196), INT32_C(1917), -INT32_C(77) }, + { -INT32_C(506), -INT32_C(4630), -INT32_C(894), INT32_C(1062) } }, + { INT32_C(121), -INT32_C(4389)}, + { { -INT32_C(2915), INT32_C(121), INT32_C(1917), -INT32_C(77) }, + { -INT32_C(506), -INT32_C(4389), -INT32_C(894), INT32_C(1062) } } }, + { { { -INT32_C(2980), INT32_C(3337), -INT32_C(2109), -INT32_C(1551) }, + { INT32_C(3126), -INT32_C(3966), INT32_C(686), INT32_C(3121) } }, + { INT32_C(3921), -INT32_C(4929)}, + { { -INT32_C(2980), INT32_C(3337), INT32_C(3921), -INT32_C(1551) }, + { INT32_C(3126), -INT32_C(3966), -INT32_C(4929), INT32_C(3121) } } }, + { { { -INT32_C(4755), INT32_C(2232), INT32_C(2970), -INT32_C(272) }, + { INT32_C(492), -INT32_C(4034), -INT32_C(412), INT32_C(1495) } }, + { INT32_C(4153), INT32_C(2365)}, + { { -INT32_C(4755), INT32_C(2232), INT32_C(2970), INT32_C(4153) }, + { INT32_C(492), -INT32_C(4034), -INT32_C(412), INT32_C(2365) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4x2_t r, src, expected; + src.val[0] = simde_vld1q_s32(test_vec[i].src[0]); + src.val[1] = simde_vld1q_s32(test_vec[i].src[1]); + + SIMDE_CONSTIFY_4_(simde_vld2q_lane_s32, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_s32(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_s32(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_i32x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i32x4(r.val[1], expected.val[1]); + } + return 0; +} + +static int +test_simde_vld2q_lane_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t src[2][2]; + int64_t buf[2]; + int64_t r[2][2]; + } test_vec[] = { + { { { INT64_C(133259958), INT64_C(495309638) }, + { INT64_C(463332514), INT64_C(273508271) } }, + { INT64_C(159685885), -INT64_C(187025881)}, + { { INT64_C(159685885), INT64_C(495309638) }, + { -INT64_C(187025881), INT64_C(273508271) } } }, + { { { -INT64_C(430109367), -INT64_C(402602870) }, + { -INT64_C(309788205), -INT64_C(441710399) } }, + { -INT64_C(264334842), -INT64_C(60309831)}, + { { -INT64_C(430109367), -INT64_C(264334842) }, + { -INT64_C(309788205), -INT64_C(60309831) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2x2_t r, src, expected; + src.val[0] = simde_vld1q_s64(test_vec[i].src[0]); + src.val[1] = simde_vld1q_s64(test_vec[i].src[1]); + + SIMDE_CONSTIFY_2_(simde_vld2q_lane_s64, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_s64(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_s64(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_i64x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i64x2(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2q_lane_u8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint8_t src[2][16]; + uint8_t buf[2]; + uint8_t r[2][16]; + } test_vec[] = { + { { { UINT8_C(46), UINT8_C(42), UINT8_C(1), UINT8_C(9), UINT8_C(24), UINT8_C(23), UINT8_C(11), UINT8_C(37), + UINT8_C(22), UINT8_C(30), UINT8_C(25), UINT8_C(27), UINT8_C(10), UINT8_C(26), UINT8_C(1), UINT8_C(10) }, + { UINT8_C(13), UINT8_C(29), UINT8_C(22), UINT8_C(40), UINT8_C(48), UINT8_C(40), UINT8_C(0), UINT8_C(12), + UINT8_C(18), UINT8_C(35), UINT8_C(43), UINT8_C(30), UINT8_C(32), UINT8_C(30), UINT8_C(21), UINT8_C(36) } }, + { UINT8_C(13), UINT8_C(15)}, + { { UINT8_C(13), UINT8_C(42), UINT8_C(1), UINT8_C(9), UINT8_C(24), UINT8_C(23), UINT8_C(11), UINT8_C(37), + UINT8_C(22), UINT8_C(30), UINT8_C(25), UINT8_C(27), UINT8_C(10), UINT8_C(26), UINT8_C(1), UINT8_C(10) }, + { UINT8_C(15), UINT8_C(29), UINT8_C(22), UINT8_C(40), UINT8_C(48), UINT8_C(40), UINT8_C(0), UINT8_C(12), + UINT8_C(18), UINT8_C(35), UINT8_C(43), UINT8_C(30), UINT8_C(32), UINT8_C(30), UINT8_C(21), UINT8_C(36) } } }, + { { { UINT8_C(20), UINT8_C(7), UINT8_C(32), UINT8_C(38), UINT8_C(1), UINT8_C(28), UINT8_C(5), UINT8_C(1), + UINT8_C(17), UINT8_C(10), UINT8_C(10), UINT8_C(23), UINT8_C(26), UINT8_C(1), UINT8_C(37), UINT8_C(35) }, + { UINT8_C(18), UINT8_C(31), UINT8_C(37), UINT8_C(26), UINT8_C(9), UINT8_C(14), UINT8_C(17), UINT8_C(38), + UINT8_C(8), UINT8_C(27), UINT8_C(45), UINT8_C(15), UINT8_C(0), UINT8_C(36), UINT8_C(1), UINT8_C(20) } }, + { UINT8_C(11), UINT8_C(6)}, + { { UINT8_C(20), UINT8_C(11), UINT8_C(32), UINT8_C(38), UINT8_C(1), UINT8_C(28), UINT8_C(5), UINT8_C(1), + UINT8_C(17), UINT8_C(10), UINT8_C(10), UINT8_C(23), UINT8_C(26), UINT8_C(1), UINT8_C(37), UINT8_C(35) }, + { UINT8_C(18), UINT8_C(6), UINT8_C(37), UINT8_C(26), UINT8_C(9), UINT8_C(14), UINT8_C(17), UINT8_C(38), + UINT8_C(8), UINT8_C(27), UINT8_C(45), UINT8_C(15), UINT8_C(0), UINT8_C(36), UINT8_C(1), UINT8_C(20) } } }, + { { { UINT8_C(13), UINT8_C(26), UINT8_C(44), UINT8_C(35), UINT8_C(42), UINT8_C(3), UINT8_C(9), UINT8_C(23), + UINT8_C(0), UINT8_C(20), UINT8_C(17), UINT8_C(46), UINT8_C(15), UINT8_C(39), UINT8_C(26), UINT8_C(34) }, + { UINT8_C(5), UINT8_C(22), UINT8_C(30), UINT8_C(9), UINT8_C(17), UINT8_C(44), UINT8_C(46), UINT8_C(25), + UINT8_C(45), UINT8_C(46), UINT8_C(16), UINT8_C(6), UINT8_C(40), UINT8_C(37), UINT8_C(15), UINT8_C(11) } }, + { UINT8_C(25), UINT8_C(25)}, + { { UINT8_C(13), UINT8_C(26), UINT8_C(25), UINT8_C(35), UINT8_C(42), UINT8_C(3), UINT8_C(9), UINT8_C(23), + UINT8_C(0), UINT8_C(20), UINT8_C(17), UINT8_C(46), UINT8_C(15), UINT8_C(39), UINT8_C(26), UINT8_C(34) }, + { UINT8_C(5), UINT8_C(22), UINT8_C(25), UINT8_C(9), UINT8_C(17), UINT8_C(44), UINT8_C(46), UINT8_C(25), + UINT8_C(45), UINT8_C(46), UINT8_C(16), UINT8_C(6), UINT8_C(40), UINT8_C(37), UINT8_C(15), UINT8_C(11) } } }, + { { { UINT8_C(6), UINT8_C(37), UINT8_C(49), UINT8_C(9), UINT8_C(38), UINT8_C(40), UINT8_C(21), UINT8_C(28), + UINT8_C(2), UINT8_C(1), UINT8_C(18), UINT8_C(31), UINT8_C(34), UINT8_C(2), UINT8_C(23), UINT8_C(35) }, + { UINT8_C(17), UINT8_C(12), UINT8_C(24), UINT8_C(48), UINT8_C(30), UINT8_C(16), UINT8_C(21), UINT8_C(31), + UINT8_C(39), UINT8_C(29), UINT8_C(4), UINT8_C(18), UINT8_C(47), UINT8_C(15), UINT8_C(49), UINT8_C(18) } }, + { UINT8_C(41), UINT8_C(33)}, + { { UINT8_C(6), UINT8_C(37), UINT8_C(49), UINT8_C(41), UINT8_C(38), UINT8_C(40), UINT8_C(21), UINT8_C(28), + UINT8_C(2), UINT8_C(1), UINT8_C(18), UINT8_C(31), UINT8_C(34), UINT8_C(2), UINT8_C(23), UINT8_C(35) }, + { UINT8_C(17), UINT8_C(12), UINT8_C(24), UINT8_C(33), UINT8_C(30), UINT8_C(16), UINT8_C(21), UINT8_C(31), + UINT8_C(39), UINT8_C(29), UINT8_C(4), UINT8_C(18), UINT8_C(47), UINT8_C(15), UINT8_C(49), UINT8_C(18) } } }, + { { { UINT8_C(10), UINT8_C(27), UINT8_C(21), UINT8_C(23), UINT8_C(17), UINT8_C(42), UINT8_C(24), UINT8_C(35), + UINT8_C(38), UINT8_C(3), UINT8_C(29), UINT8_C(41), UINT8_C(44), UINT8_C(15), UINT8_C(48), UINT8_C(26) }, + { UINT8_C(9), UINT8_C(47), UINT8_C(29), UINT8_C(8), UINT8_C(7), UINT8_C(6), UINT8_C(26), UINT8_C(9), + UINT8_C(11), UINT8_C(2), UINT8_C(6), UINT8_C(22), UINT8_C(34), UINT8_C(5), UINT8_C(38), UINT8_C(18) } }, + { UINT8_C(21), UINT8_C(25)}, + { { UINT8_C(10), UINT8_C(27), UINT8_C(21), UINT8_C(23), UINT8_C(21), UINT8_C(42), UINT8_C(24), UINT8_C(35), + UINT8_C(38), UINT8_C(3), UINT8_C(29), UINT8_C(41), UINT8_C(44), UINT8_C(15), UINT8_C(48), UINT8_C(26) }, + { UINT8_C(9), UINT8_C(47), UINT8_C(29), UINT8_C(8), UINT8_C(25), UINT8_C(6), UINT8_C(26), UINT8_C(9), + UINT8_C(11), UINT8_C(2), UINT8_C(6), UINT8_C(22), UINT8_C(34), UINT8_C(5), UINT8_C(38), UINT8_C(18) } } }, + { { { UINT8_C(2), UINT8_C(33), UINT8_C(23), UINT8_C(0), UINT8_C(48), UINT8_C(2), UINT8_C(26), UINT8_C(29), + UINT8_C(38), UINT8_C(14), UINT8_C(48), UINT8_C(0), UINT8_C(44), UINT8_C(42), UINT8_C(37), UINT8_C(5) }, + { UINT8_C(24), UINT8_C(4), UINT8_C(37), UINT8_C(46), UINT8_C(40), UINT8_C(24), UINT8_C(40), UINT8_C(18), + UINT8_C(44), UINT8_C(48), UINT8_C(36), UINT8_C(45), UINT8_C(2), UINT8_C(4), UINT8_C(35), UINT8_C(9) } }, + { UINT8_C(15), UINT8_C(13)}, + { { UINT8_C(2), UINT8_C(33), UINT8_C(23), UINT8_C(0), UINT8_C(48), UINT8_C(15), UINT8_C(26), UINT8_C(29), + UINT8_C(38), UINT8_C(14), UINT8_C(48), UINT8_C(0), UINT8_C(44), UINT8_C(42), UINT8_C(37), UINT8_C(5) }, + { UINT8_C(24), UINT8_C(4), UINT8_C(37), UINT8_C(46), UINT8_C(40), UINT8_C(13), UINT8_C(40), UINT8_C(18), + UINT8_C(44), UINT8_C(48), UINT8_C(36), UINT8_C(45), UINT8_C(2), UINT8_C(4), UINT8_C(35), UINT8_C(9) } } }, + { { { UINT8_C(19), UINT8_C(26), UINT8_C(45), UINT8_C(37), UINT8_C(27), UINT8_C(2), UINT8_C(43), UINT8_C(37), + UINT8_C(26), UINT8_C(18), UINT8_C(35), UINT8_C(9), UINT8_C(15), UINT8_C(38), UINT8_C(12), UINT8_C(35) }, + { UINT8_C(9), UINT8_C(5), UINT8_C(6), UINT8_C(48), UINT8_C(10), UINT8_C(7), UINT8_C(18), UINT8_C(18), + UINT8_C(5), UINT8_C(48), UINT8_C(36), UINT8_C(12), UINT8_C(8), UINT8_C(23), UINT8_C(0), UINT8_C(27) } }, + { UINT8_C(19), UINT8_C(38)}, + { { UINT8_C(19), UINT8_C(26), UINT8_C(45), UINT8_C(37), UINT8_C(27), UINT8_C(2), UINT8_C(19), UINT8_C(37), + UINT8_C(26), UINT8_C(18), UINT8_C(35), UINT8_C(9), UINT8_C(15), UINT8_C(38), UINT8_C(12), UINT8_C(35) }, + { UINT8_C(9), UINT8_C(5), UINT8_C(6), UINT8_C(48), UINT8_C(10), UINT8_C(7), UINT8_C(38), UINT8_C(18), + UINT8_C(5), UINT8_C(48), UINT8_C(36), UINT8_C(12), UINT8_C(8), UINT8_C(23), UINT8_C(0), UINT8_C(27) } } }, + { { { UINT8_C(27), UINT8_C(17), UINT8_C(31), UINT8_C(14), UINT8_C(14), UINT8_C(10), UINT8_C(47), UINT8_C(29), + UINT8_C(16), UINT8_C(21), UINT8_C(21), UINT8_C(49), UINT8_C(40), UINT8_C(7), UINT8_C(47), UINT8_C(0) }, + { UINT8_C(39), UINT8_C(29), UINT8_C(1), UINT8_C(3), UINT8_C(6), UINT8_C(48), UINT8_C(32), UINT8_C(2), + UINT8_C(14), UINT8_C(9), UINT8_C(49), UINT8_C(9), UINT8_C(7), UINT8_C(24), UINT8_C(42), UINT8_C(39) } }, + { UINT8_C(4), UINT8_C(15)}, + { { UINT8_C(27), UINT8_C(17), UINT8_C(31), UINT8_C(14), UINT8_C(14), UINT8_C(10), UINT8_C(47), UINT8_C(4), + UINT8_C(16), UINT8_C(21), UINT8_C(21), UINT8_C(49), UINT8_C(40), UINT8_C(7), UINT8_C(47), UINT8_C(0) }, + { UINT8_C(39), UINT8_C(29), UINT8_C(1), UINT8_C(3), UINT8_C(6), UINT8_C(48), UINT8_C(32), UINT8_C(15), + UINT8_C(14), UINT8_C(9), UINT8_C(49), UINT8_C(9), UINT8_C(7), UINT8_C(24), UINT8_C(42), UINT8_C(39) } } }, + { { { UINT8_C(37), UINT8_C(15), UINT8_C(46), UINT8_C(31), UINT8_C(34), UINT8_C(45), UINT8_C(14), UINT8_C(4), + UINT8_C(14), UINT8_C(26), UINT8_C(46), UINT8_C(29), UINT8_C(18), UINT8_C(8), UINT8_C(42), UINT8_C(46) }, + { UINT8_C(21), UINT8_C(6), UINT8_C(17), UINT8_C(27), UINT8_C(20), UINT8_C(32), UINT8_C(7), UINT8_C(47), + UINT8_C(2), UINT8_C(20), UINT8_C(26), UINT8_C(18), UINT8_C(49), UINT8_C(34), UINT8_C(49), UINT8_C(20) } }, + { UINT8_C(35), UINT8_C(38)}, + { { UINT8_C(37), UINT8_C(15), UINT8_C(46), UINT8_C(31), UINT8_C(34), UINT8_C(45), UINT8_C(14), UINT8_C(4), + UINT8_C(35), UINT8_C(26), UINT8_C(46), UINT8_C(29), UINT8_C(18), UINT8_C(8), UINT8_C(42), UINT8_C(46) }, + { UINT8_C(21), UINT8_C(6), UINT8_C(17), UINT8_C(27), UINT8_C(20), UINT8_C(32), UINT8_C(7), UINT8_C(47), + UINT8_C(38), UINT8_C(20), UINT8_C(26), UINT8_C(18), UINT8_C(49), UINT8_C(34), UINT8_C(49), UINT8_C(20) } } }, + { { { UINT8_C(3), UINT8_C(7), UINT8_C(11), UINT8_C(42), UINT8_C(30), UINT8_C(18), UINT8_C(19), UINT8_C(10), + UINT8_C(28), UINT8_C(27), UINT8_C(40), UINT8_C(8), UINT8_C(37), UINT8_C(45), UINT8_C(46), UINT8_C(28) }, + { UINT8_C(20), UINT8_C(48), UINT8_C(0), UINT8_C(46), UINT8_C(25), UINT8_C(39), UINT8_C(0), UINT8_C(21), + UINT8_C(25), UINT8_C(28), UINT8_C(48), UINT8_C(12), UINT8_C(13), UINT8_C(44), UINT8_C(45), UINT8_C(26) } }, + { UINT8_C(47), UINT8_C(22)}, + { { UINT8_C(3), UINT8_C(7), UINT8_C(11), UINT8_C(42), UINT8_C(30), UINT8_C(18), UINT8_C(19), UINT8_C(10), + UINT8_C(28), UINT8_C(47), UINT8_C(40), UINT8_C(8), UINT8_C(37), UINT8_C(45), UINT8_C(46), UINT8_C(28) }, + { UINT8_C(20), UINT8_C(48), UINT8_C(0), UINT8_C(46), UINT8_C(25), UINT8_C(39), UINT8_C(0), UINT8_C(21), + UINT8_C(25), UINT8_C(22), UINT8_C(48), UINT8_C(12), UINT8_C(13), UINT8_C(44), UINT8_C(45), UINT8_C(26) } } }, + { { { UINT8_C(3), UINT8_C(35), UINT8_C(46), UINT8_C(27), UINT8_C(13), UINT8_C(31), UINT8_C(44), UINT8_C(37), + UINT8_C(46), UINT8_C(47), UINT8_C(29), UINT8_C(24), UINT8_C(0), UINT8_C(31), UINT8_C(24), UINT8_C(39) }, + { UINT8_C(8), UINT8_C(45), UINT8_C(22), UINT8_C(28), UINT8_C(9), UINT8_C(19), UINT8_C(27), UINT8_C(1), + UINT8_C(47), UINT8_C(38), UINT8_C(11), UINT8_C(30), UINT8_C(18), UINT8_C(13), UINT8_C(36), UINT8_C(6) } }, + { UINT8_C(27), UINT8_C(45)}, + { { UINT8_C(3), UINT8_C(35), UINT8_C(46), UINT8_C(27), UINT8_C(13), UINT8_C(31), UINT8_C(44), UINT8_C(37), + UINT8_C(46), UINT8_C(47), UINT8_C(27), UINT8_C(24), UINT8_C(0), UINT8_C(31), UINT8_C(24), UINT8_C(39) }, + { UINT8_C(8), UINT8_C(45), UINT8_C(22), UINT8_C(28), UINT8_C(9), UINT8_C(19), UINT8_C(27), UINT8_C(1), + UINT8_C(47), UINT8_C(38), UINT8_C(45), UINT8_C(30), UINT8_C(18), UINT8_C(13), UINT8_C(36), UINT8_C(6) } } }, + { { { UINT8_C(27), UINT8_C(22), UINT8_C(2), UINT8_C(45), UINT8_C(16), UINT8_C(20), UINT8_C(11), UINT8_C(41), + UINT8_C(5), UINT8_C(39), UINT8_C(35), UINT8_C(0), UINT8_C(42), UINT8_C(15), UINT8_C(7), UINT8_C(9) }, + { UINT8_C(42), UINT8_C(1), UINT8_C(5), UINT8_C(26), UINT8_C(17), UINT8_C(42), UINT8_C(12), UINT8_C(7), + UINT8_C(43), UINT8_C(27), UINT8_C(46), UINT8_C(18), UINT8_C(44), UINT8_C(7), UINT8_C(49), UINT8_C(24) } }, + { UINT8_C(14), UINT8_C(8)}, + { { UINT8_C(27), UINT8_C(22), UINT8_C(2), UINT8_C(45), UINT8_C(16), UINT8_C(20), UINT8_C(11), UINT8_C(41), + UINT8_C(5), UINT8_C(39), UINT8_C(35), UINT8_C(14), UINT8_C(42), UINT8_C(15), UINT8_C(7), UINT8_C(9) }, + { UINT8_C(42), UINT8_C(1), UINT8_C(5), UINT8_C(26), UINT8_C(17), UINT8_C(42), UINT8_C(12), UINT8_C(7), + UINT8_C(43), UINT8_C(27), UINT8_C(46), UINT8_C(8), UINT8_C(44), UINT8_C(7), UINT8_C(49), UINT8_C(24) } } }, + { { { UINT8_C(49), UINT8_C(24), UINT8_C(24), UINT8_C(11), UINT8_C(12), UINT8_C(25), UINT8_C(0), UINT8_C(3), + UINT8_C(10), UINT8_C(16), UINT8_C(24), UINT8_C(47), UINT8_C(42), UINT8_C(8), UINT8_C(45), UINT8_C(14) }, + { UINT8_C(19), UINT8_C(19), UINT8_C(0), UINT8_C(4), UINT8_C(9), UINT8_C(37), UINT8_C(17), UINT8_C(6), + UINT8_C(16), UINT8_C(49), UINT8_C(39), UINT8_C(30), UINT8_C(42), UINT8_C(1), UINT8_C(27), UINT8_C(22) } }, + { UINT8_C(14), UINT8_C(8)}, + { { UINT8_C(49), UINT8_C(24), UINT8_C(24), UINT8_C(11), UINT8_C(12), UINT8_C(25), UINT8_C(0), UINT8_C(3), + UINT8_C(10), UINT8_C(16), UINT8_C(24), UINT8_C(47), UINT8_C(14), UINT8_C(8), UINT8_C(45), UINT8_C(14) }, + { UINT8_C(19), UINT8_C(19), UINT8_C(0), UINT8_C(4), UINT8_C(9), UINT8_C(37), UINT8_C(17), UINT8_C(6), + UINT8_C(16), UINT8_C(49), UINT8_C(39), UINT8_C(30), UINT8_C(8), UINT8_C(1), UINT8_C(27), UINT8_C(22) } } }, + { { { UINT8_C(6), UINT8_C(22), UINT8_C(33), UINT8_C(31), UINT8_C(29), UINT8_C(16), UINT8_C(29), UINT8_C(38), + UINT8_C(25), UINT8_C(9), UINT8_C(19), UINT8_C(12), UINT8_C(2), UINT8_C(47), UINT8_C(19), UINT8_C(47) }, + { UINT8_C(42), UINT8_C(2), UINT8_C(24), UINT8_C(27), UINT8_C(3), UINT8_C(20), UINT8_C(2), UINT8_C(20), + UINT8_C(10), UINT8_C(29), UINT8_C(37), UINT8_C(33), UINT8_C(47), UINT8_C(1), UINT8_C(31), UINT8_C(4) } }, + { UINT8_C(12), UINT8_C(31)}, + { { UINT8_C(6), UINT8_C(22), UINT8_C(33), UINT8_C(31), UINT8_C(29), UINT8_C(16), UINT8_C(29), UINT8_C(38), + UINT8_C(25), UINT8_C(9), UINT8_C(19), UINT8_C(12), UINT8_C(2), UINT8_C(12), UINT8_C(19), UINT8_C(47) }, + { UINT8_C(42), UINT8_C(2), UINT8_C(24), UINT8_C(27), UINT8_C(3), UINT8_C(20), UINT8_C(2), UINT8_C(20), + UINT8_C(10), UINT8_C(29), UINT8_C(37), UINT8_C(33), UINT8_C(47), UINT8_C(31), UINT8_C(31), UINT8_C(4) } } }, + { { { UINT8_C(4), UINT8_C(34), UINT8_C(11), UINT8_C(37), UINT8_C(11), UINT8_C(18), UINT8_C(0), UINT8_C(13), + UINT8_C(45), UINT8_C(20), UINT8_C(3), UINT8_C(34), UINT8_C(42), UINT8_C(2), UINT8_C(17), UINT8_C(0) }, + { UINT8_C(43), UINT8_C(15), UINT8_C(29), UINT8_C(47), UINT8_C(29), UINT8_C(31), UINT8_C(20), UINT8_C(42), + UINT8_C(37), UINT8_C(6), UINT8_C(39), UINT8_C(2), UINT8_C(40), UINT8_C(34), UINT8_C(46), UINT8_C(21) } }, + { UINT8_C(1), UINT8_C(27)}, + { { UINT8_C(4), UINT8_C(34), UINT8_C(11), UINT8_C(37), UINT8_C(11), UINT8_C(18), UINT8_C(0), UINT8_C(13), + UINT8_C(45), UINT8_C(20), UINT8_C(3), UINT8_C(34), UINT8_C(42), UINT8_C(2), UINT8_C(1), UINT8_C(0) }, + { UINT8_C(43), UINT8_C(15), UINT8_C(29), UINT8_C(47), UINT8_C(29), UINT8_C(31), UINT8_C(20), UINT8_C(42), + UINT8_C(37), UINT8_C(6), UINT8_C(39), UINT8_C(2), UINT8_C(40), UINT8_C(34), UINT8_C(27), UINT8_C(21) } } }, + { { { UINT8_C(45), UINT8_C(24), UINT8_C(18), UINT8_C(2), UINT8_C(29), UINT8_C(24), UINT8_C(25), UINT8_C(8), + UINT8_C(2), UINT8_C(10), UINT8_C(15), UINT8_C(30), UINT8_C(16), UINT8_C(4), UINT8_C(46), UINT8_C(26) }, + { UINT8_C(24), UINT8_C(33), UINT8_C(14), UINT8_C(42), UINT8_C(12), UINT8_C(0), UINT8_C(14), UINT8_C(30), + UINT8_C(11), UINT8_C(3), UINT8_C(46), UINT8_C(16), UINT8_C(32), UINT8_C(30), UINT8_C(15), UINT8_C(0) } }, + { UINT8_C(18), UINT8_C(19)}, + { { UINT8_C(45), UINT8_C(24), UINT8_C(18), UINT8_C(2), UINT8_C(29), UINT8_C(24), UINT8_C(25), UINT8_C(8), + UINT8_C(2), UINT8_C(10), UINT8_C(15), UINT8_C(30), UINT8_C(16), UINT8_C(4), UINT8_C(46), UINT8_C(18) }, + { UINT8_C(24), UINT8_C(33), UINT8_C(14), UINT8_C(42), UINT8_C(12), UINT8_C(0), UINT8_C(14), UINT8_C(30), + UINT8_C(11), UINT8_C(3), UINT8_C(46), UINT8_C(16), UINT8_C(32), UINT8_C(30), UINT8_C(15), UINT8_C(19) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x16x2_t r, src, expected; + src.val[0] = simde_vld1q_u8(test_vec[i].src[0]); + src.val[1] = simde_vld1q_u8(test_vec[i].src[1]); + + SIMDE_CONSTIFY_16_(simde_vld2q_lane_u8, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_u8(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_u8(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_u8x16(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u8x16(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2q_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t src[2][8]; + uint16_t buf[2]; + uint16_t r[2][8]; + } test_vec[] = { + { { { UINT16_C(418), UINT16_C(216), UINT16_C(347), UINT16_C(476), + UINT16_C(360), UINT16_C(143), UINT16_C(316), UINT16_C(114) }, + { UINT16_C(387), UINT16_C(146), UINT16_C(225), UINT16_C(190), + UINT16_C(399), UINT16_C(128), UINT16_C(400), UINT16_C(257) } }, + { UINT16_C(488), UINT16_C(232)}, + { { UINT16_C(488), UINT16_C(216), UINT16_C(347), UINT16_C(476), + UINT16_C(360), UINT16_C(143), UINT16_C(316), UINT16_C(114) }, + { UINT16_C(232), UINT16_C(146), UINT16_C(225), UINT16_C(190), + UINT16_C(399), UINT16_C(128), UINT16_C(400), UINT16_C(257) } } }, + { { { UINT16_C(311), UINT16_C(430), UINT16_C(36), UINT16_C(399), + UINT16_C(84), UINT16_C(165), UINT16_C(434), UINT16_C(105) }, + { UINT16_C(449), UINT16_C(455), UINT16_C(390), UINT16_C(160), + UINT16_C(77), UINT16_C(147), UINT16_C(38), UINT16_C(271) } }, + { UINT16_C(106), UINT16_C(427)}, + { { UINT16_C(311), UINT16_C(106), UINT16_C(36), UINT16_C(399), + UINT16_C(84), UINT16_C(165), UINT16_C(434), UINT16_C(105) }, + { UINT16_C(449), UINT16_C(427), UINT16_C(390), UINT16_C(160), + UINT16_C(77), UINT16_C(147), UINT16_C(38), UINT16_C(271) } } }, + { { { UINT16_C(181), UINT16_C(136), UINT16_C(379), UINT16_C(296), + UINT16_C(309), UINT16_C(435), UINT16_C(152), UINT16_C(215) }, + { UINT16_C(379), UINT16_C(147), UINT16_C(345), UINT16_C(276), + UINT16_C(105), UINT16_C(142), UINT16_C(485), UINT16_C(383) } }, + { UINT16_C(39), UINT16_C(212)}, + { { UINT16_C(181), UINT16_C(136), UINT16_C(39), UINT16_C(296), + UINT16_C(309), UINT16_C(435), UINT16_C(152), UINT16_C(215) }, + { UINT16_C(379), UINT16_C(147), UINT16_C(212), UINT16_C(276), + UINT16_C(105), UINT16_C(142), UINT16_C(485), UINT16_C(383) } } }, + { { { UINT16_C(181), UINT16_C(451), UINT16_C(74), UINT16_C(26), + UINT16_C(435), UINT16_C(466), UINT16_C(458), UINT16_C(378) }, + { UINT16_C(292), UINT16_C(173), UINT16_C(470), UINT16_C(400), + UINT16_C(286), UINT16_C(269), UINT16_C(383), UINT16_C(317) } }, + { UINT16_C(240), UINT16_C(459)}, + { { UINT16_C(181), UINT16_C(451), UINT16_C(74), UINT16_C(240), + UINT16_C(435), UINT16_C(466), UINT16_C(458), UINT16_C(378) }, + { UINT16_C(292), UINT16_C(173), UINT16_C(470), UINT16_C(459), + UINT16_C(286), UINT16_C(269), UINT16_C(383), UINT16_C(317) } } }, + { { { UINT16_C(377), UINT16_C(162), UINT16_C(3), UINT16_C(316), + UINT16_C(150), UINT16_C(261), UINT16_C(40), UINT16_C(64) }, + { UINT16_C(243), UINT16_C(47), UINT16_C(381), UINT16_C(58), + UINT16_C(290), UINT16_C(64), UINT16_C(220), UINT16_C(89) } }, + { UINT16_C(378), UINT16_C(355)}, + { { UINT16_C(377), UINT16_C(162), UINT16_C(3), UINT16_C(316), + UINT16_C(378), UINT16_C(261), UINT16_C(40), UINT16_C(64) }, + { UINT16_C(243), UINT16_C(47), UINT16_C(381), UINT16_C(58), + UINT16_C(355), UINT16_C(64), UINT16_C(220), UINT16_C(89) } } }, + { { { UINT16_C(298), UINT16_C(486), UINT16_C(96), UINT16_C(132), + UINT16_C(108), UINT16_C(284), UINT16_C(54), UINT16_C(224) }, + { UINT16_C(249), UINT16_C(117), UINT16_C(26), UINT16_C(467), + UINT16_C(477), UINT16_C(25), UINT16_C(300), UINT16_C(233) } }, + { UINT16_C(454), UINT16_C(179)}, + { { UINT16_C(298), UINT16_C(486), UINT16_C(96), UINT16_C(132), + UINT16_C(108), UINT16_C(454), UINT16_C(54), UINT16_C(224) }, + { UINT16_C(249), UINT16_C(117), UINT16_C(26), UINT16_C(467), + UINT16_C(477), UINT16_C(179), UINT16_C(300), UINT16_C(233) } } }, + { { { UINT16_C(15), UINT16_C(88), UINT16_C(166), UINT16_C(374), + UINT16_C(203), UINT16_C(81), UINT16_C(316), UINT16_C(214) }, + { UINT16_C(227), UINT16_C(24), UINT16_C(14), UINT16_C(100), + UINT16_C(31), UINT16_C(86), UINT16_C(474), UINT16_C(470) } }, + { UINT16_C(325), UINT16_C(388)}, + { { UINT16_C(15), UINT16_C(88), UINT16_C(166), UINT16_C(374), + UINT16_C(203), UINT16_C(81), UINT16_C(325), UINT16_C(214) }, + { UINT16_C(227), UINT16_C(24), UINT16_C(14), UINT16_C(100), + UINT16_C(31), UINT16_C(86), UINT16_C(388), UINT16_C(470) } } }, + { { { UINT16_C(370), UINT16_C(25), UINT16_C(181), UINT16_C(342), + UINT16_C(6), UINT16_C(89), UINT16_C(30), UINT16_C(425) }, + { UINT16_C(478), UINT16_C(331), UINT16_C(463), UINT16_C(400), + UINT16_C(469), UINT16_C(40), UINT16_C(46), UINT16_C(134) } }, + { UINT16_C(225), UINT16_C(7)}, + { { UINT16_C(370), UINT16_C(25), UINT16_C(181), UINT16_C(342), + UINT16_C(6), UINT16_C(89), UINT16_C(30), UINT16_C(225) }, + { UINT16_C(478), UINT16_C(331), UINT16_C(463), UINT16_C(400), + UINT16_C(469), UINT16_C(40), UINT16_C(46), UINT16_C(7) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x8x2_t r, src, expected; + src.val[0] = simde_vld1q_u16(test_vec[i].src[0]); + src.val[1] = simde_vld1q_u16(test_vec[i].src[1]); + + SIMDE_CONSTIFY_8_(simde_vld2q_lane_u16, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_u16(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_u16(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_u16x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u16x8(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2q_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t src[2][4]; + uint32_t buf[2]; + uint32_t r[2][4]; + } test_vec[] = { + { { { UINT32_C(224), UINT32_C(24), UINT32_C(2014), UINT32_C(3335) }, + { UINT32_C(4141), UINT32_C(3456), UINT32_C(3311), UINT32_C(746) } }, + { UINT32_C(2149), UINT32_C(82)}, + { { UINT32_C(2149), UINT32_C(24), UINT32_C(2014), UINT32_C(3335) }, + { UINT32_C(82), UINT32_C(3456), UINT32_C(3311), UINT32_C(746) } } }, + { { { UINT32_C(2915), UINT32_C(4196), UINT32_C(1917), UINT32_C(77) }, + { UINT32_C(506), UINT32_C(4630), UINT32_C(894), UINT32_C(1062) } }, + { UINT32_C(121), UINT32_C(4389)}, + { { UINT32_C(2915), UINT32_C(121), UINT32_C(1917), UINT32_C(77) }, + { UINT32_C(506), UINT32_C(4389), UINT32_C(894), UINT32_C(1062) } } }, + { { { UINT32_C(2980), UINT32_C(3337), UINT32_C(2109), UINT32_C(1551) }, + { UINT32_C(3126), UINT32_C(3966), UINT32_C(686), UINT32_C(3121) } }, + { UINT32_C(3921), UINT32_C(4929)}, + { { UINT32_C(2980), UINT32_C(3337), UINT32_C(3921), UINT32_C(1551) }, + { UINT32_C(3126), UINT32_C(3966), UINT32_C(4929), UINT32_C(3121) } } }, + { { { UINT32_C(4755), UINT32_C(2232), UINT32_C(2970), UINT32_C(272) }, + { UINT32_C(492), UINT32_C(4034), UINT32_C(412), UINT32_C(1495) } }, + { UINT32_C(4153), UINT32_C(2365)}, + { { UINT32_C(4755), UINT32_C(2232), UINT32_C(2970), UINT32_C(4153) }, + { UINT32_C(492), UINT32_C(4034), UINT32_C(412), UINT32_C(2365) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4x2_t r, src, expected; + src.val[0] = simde_vld1q_u32(test_vec[i].src[0]); + src.val[1] = simde_vld1q_u32(test_vec[i].src[1]); + + SIMDE_CONSTIFY_4_(simde_vld2q_lane_u32, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_u32(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_u32(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_u32x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u32x4(r.val[1], expected.val[1]); + } + return 0; +} + +static int +test_simde_vld2q_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t src[2][2]; + uint64_t buf[2]; + uint64_t r[2][2]; + } test_vec[] = { + { { { UINT64_C(133259958), UINT64_C(495309638) }, + { UINT64_C(463332514), UINT64_C(273508271) } }, + { UINT64_C(159685885), UINT64_C(187025881)}, + { { UINT64_C(159685885), UINT64_C(495309638) }, + { UINT64_C(187025881), UINT64_C(273508271) } } }, + { { { UINT64_C(430109367), UINT64_C(402602870) }, + { UINT64_C(309788205), UINT64_C(441710399) } }, + { UINT64_C(264334842), UINT64_C(60309831)}, + { { UINT64_C(430109367), UINT64_C(264334842) }, + { UINT64_C(309788205), UINT64_C(60309831) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2x2_t r, src, expected; + src.val[0] = simde_vld1q_u64(test_vec[i].src[0]); + src.val[1] = simde_vld1q_u64(test_vec[i].src[1]); + + SIMDE_CONSTIFY_2_(simde_vld2q_lane_u64, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_u64(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_u64(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_u64x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u64x2(r.val[1], expected.val[1]); + } + + return 0; +} + +static int +test_simde_vld2q_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t src[2][8]; + simde_float16_t buf[2]; + simde_float16_t r[2][8]; + } test_vec[] = { + { { { SIMDE_FLOAT16_VALUE(-44.86), SIMDE_FLOAT16_VALUE(-27.30), SIMDE_FLOAT16_VALUE(-35.67), SIMDE_FLOAT16_VALUE(22.98), + SIMDE_FLOAT16_VALUE(-34.31), SIMDE_FLOAT16_VALUE(-40.87), SIMDE_FLOAT16_VALUE(-37.54), SIMDE_FLOAT16_VALUE(-11.89) }, + { SIMDE_FLOAT16_VALUE(-19.39), SIMDE_FLOAT16_VALUE(-21.84), SIMDE_FLOAT16_VALUE(-32.89), SIMDE_FLOAT16_VALUE(43.11), + SIMDE_FLOAT16_VALUE(-10.75), SIMDE_FLOAT16_VALUE(-18.47), SIMDE_FLOAT16_VALUE(14.59), SIMDE_FLOAT16_VALUE(48.79) } }, + { SIMDE_FLOAT16_VALUE(7.62), SIMDE_FLOAT16_VALUE(-49.51)}, + { { SIMDE_FLOAT16_VALUE(7.62), SIMDE_FLOAT16_VALUE(-27.30), SIMDE_FLOAT16_VALUE(-35.67), SIMDE_FLOAT16_VALUE(22.98), + SIMDE_FLOAT16_VALUE(-34.31), SIMDE_FLOAT16_VALUE(-40.87), SIMDE_FLOAT16_VALUE(-37.54), SIMDE_FLOAT16_VALUE(-11.89) }, + { SIMDE_FLOAT16_VALUE(-49.51), SIMDE_FLOAT16_VALUE(-21.84), SIMDE_FLOAT16_VALUE(-32.89), SIMDE_FLOAT16_VALUE(43.11), + SIMDE_FLOAT16_VALUE(-10.75), SIMDE_FLOAT16_VALUE(-18.47), SIMDE_FLOAT16_VALUE(14.59), SIMDE_FLOAT16_VALUE(48.79) } } }, + { { { SIMDE_FLOAT16_VALUE(-38.62), SIMDE_FLOAT16_VALUE(4.41), SIMDE_FLOAT16_VALUE(-33.69), SIMDE_FLOAT16_VALUE(-29.42), + SIMDE_FLOAT16_VALUE(-26.20), SIMDE_FLOAT16_VALUE(-37.93), SIMDE_FLOAT16_VALUE(-22.76), SIMDE_FLOAT16_VALUE(9.22) }, + { SIMDE_FLOAT16_VALUE(33.15), SIMDE_FLOAT16_VALUE(36.15), SIMDE_FLOAT16_VALUE(8.88), SIMDE_FLOAT16_VALUE(-47.77), + SIMDE_FLOAT16_VALUE(42.44), SIMDE_FLOAT16_VALUE(-2.49), SIMDE_FLOAT16_VALUE(-30.82), SIMDE_FLOAT16_VALUE(-8.07) } }, + { SIMDE_FLOAT16_VALUE(13.95), SIMDE_FLOAT16_VALUE(-11.74)}, + { { SIMDE_FLOAT16_VALUE(-38.62), SIMDE_FLOAT16_VALUE(13.95), SIMDE_FLOAT16_VALUE(-33.69), SIMDE_FLOAT16_VALUE(-29.42), + SIMDE_FLOAT16_VALUE(-26.20), SIMDE_FLOAT16_VALUE(-37.93), SIMDE_FLOAT16_VALUE(-22.76), SIMDE_FLOAT16_VALUE(9.22) }, + { SIMDE_FLOAT16_VALUE(33.15), SIMDE_FLOAT16_VALUE(-11.74), SIMDE_FLOAT16_VALUE(8.88), SIMDE_FLOAT16_VALUE(-47.77), + SIMDE_FLOAT16_VALUE(42.44), SIMDE_FLOAT16_VALUE(-2.49), SIMDE_FLOAT16_VALUE(-30.82), SIMDE_FLOAT16_VALUE(-8.07) } } }, + { { { SIMDE_FLOAT16_VALUE(20.63), SIMDE_FLOAT16_VALUE(-26.03), SIMDE_FLOAT16_VALUE(-49.03), SIMDE_FLOAT16_VALUE(-9.02), + SIMDE_FLOAT16_VALUE(-7.24), SIMDE_FLOAT16_VALUE(21.25), SIMDE_FLOAT16_VALUE(7.83), SIMDE_FLOAT16_VALUE(13.60) }, + { SIMDE_FLOAT16_VALUE(-25.60), SIMDE_FLOAT16_VALUE(-0.34), SIMDE_FLOAT16_VALUE(-14.32), SIMDE_FLOAT16_VALUE(11.44), + SIMDE_FLOAT16_VALUE(-0.16), SIMDE_FLOAT16_VALUE(44.81), SIMDE_FLOAT16_VALUE(-26.95), SIMDE_FLOAT16_VALUE(-20.70) } }, + { SIMDE_FLOAT16_VALUE(-12.06), SIMDE_FLOAT16_VALUE(-20.03)}, + { { SIMDE_FLOAT16_VALUE(20.63), SIMDE_FLOAT16_VALUE(-26.03), SIMDE_FLOAT16_VALUE(-12.06), SIMDE_FLOAT16_VALUE(-9.02), + SIMDE_FLOAT16_VALUE(-7.24), SIMDE_FLOAT16_VALUE(21.25), SIMDE_FLOAT16_VALUE(7.83), SIMDE_FLOAT16_VALUE(13.60) }, + { SIMDE_FLOAT16_VALUE(-25.60), SIMDE_FLOAT16_VALUE(-0.34), SIMDE_FLOAT16_VALUE(-20.03), SIMDE_FLOAT16_VALUE(11.44), + SIMDE_FLOAT16_VALUE(-0.16), SIMDE_FLOAT16_VALUE(44.81), SIMDE_FLOAT16_VALUE(-26.95), SIMDE_FLOAT16_VALUE(-20.70) } } }, + { { { SIMDE_FLOAT16_VALUE(-11.60), SIMDE_FLOAT16_VALUE(-14.50), SIMDE_FLOAT16_VALUE(45.43), SIMDE_FLOAT16_VALUE(40.80), + SIMDE_FLOAT16_VALUE(-19.67), SIMDE_FLOAT16_VALUE(-0.54), SIMDE_FLOAT16_VALUE(31.88), SIMDE_FLOAT16_VALUE(14.74) }, + { SIMDE_FLOAT16_VALUE(-31.78), SIMDE_FLOAT16_VALUE(8.81), SIMDE_FLOAT16_VALUE(10.59), SIMDE_FLOAT16_VALUE(6.10), + SIMDE_FLOAT16_VALUE(-25.88), SIMDE_FLOAT16_VALUE(0.90), SIMDE_FLOAT16_VALUE(45.87), SIMDE_FLOAT16_VALUE(13.51) } }, + { SIMDE_FLOAT16_VALUE(-26.80), SIMDE_FLOAT16_VALUE(13.43)}, + { { SIMDE_FLOAT16_VALUE(-11.60), SIMDE_FLOAT16_VALUE(-14.50), SIMDE_FLOAT16_VALUE(45.43), SIMDE_FLOAT16_VALUE(-26.80), + SIMDE_FLOAT16_VALUE(-19.67), SIMDE_FLOAT16_VALUE(-0.54), SIMDE_FLOAT16_VALUE(31.88), SIMDE_FLOAT16_VALUE(14.74) }, + { SIMDE_FLOAT16_VALUE(-31.78), SIMDE_FLOAT16_VALUE(8.81), SIMDE_FLOAT16_VALUE(10.59), SIMDE_FLOAT16_VALUE(13.43), + SIMDE_FLOAT16_VALUE(-25.88), SIMDE_FLOAT16_VALUE(0.90), SIMDE_FLOAT16_VALUE(45.87), SIMDE_FLOAT16_VALUE(13.51) } } }, + { { { SIMDE_FLOAT16_VALUE(33.71), SIMDE_FLOAT16_VALUE(-5.48), SIMDE_FLOAT16_VALUE(-13.87), SIMDE_FLOAT16_VALUE(2.58), + SIMDE_FLOAT16_VALUE(22.05), SIMDE_FLOAT16_VALUE(-25.14), SIMDE_FLOAT16_VALUE(32.71), SIMDE_FLOAT16_VALUE(49.78) }, + { SIMDE_FLOAT16_VALUE(-33.81), SIMDE_FLOAT16_VALUE(-37.35), SIMDE_FLOAT16_VALUE(6.37), SIMDE_FLOAT16_VALUE(42.63), + SIMDE_FLOAT16_VALUE(27.98), SIMDE_FLOAT16_VALUE(-47.79), SIMDE_FLOAT16_VALUE(0.37), SIMDE_FLOAT16_VALUE(24.16) } }, + { SIMDE_FLOAT16_VALUE(-22.12), SIMDE_FLOAT16_VALUE(22.45)}, + { { SIMDE_FLOAT16_VALUE(33.71), SIMDE_FLOAT16_VALUE(-5.48), SIMDE_FLOAT16_VALUE(-13.87), SIMDE_FLOAT16_VALUE(2.58), + SIMDE_FLOAT16_VALUE(-22.12), SIMDE_FLOAT16_VALUE(-25.14), SIMDE_FLOAT16_VALUE(32.71), SIMDE_FLOAT16_VALUE(49.78) }, + { SIMDE_FLOAT16_VALUE(-33.81), SIMDE_FLOAT16_VALUE(-37.35), SIMDE_FLOAT16_VALUE(6.37), SIMDE_FLOAT16_VALUE(42.63), + SIMDE_FLOAT16_VALUE(22.45), SIMDE_FLOAT16_VALUE(-47.79), SIMDE_FLOAT16_VALUE(0.37), SIMDE_FLOAT16_VALUE(24.16) } } }, + { { { SIMDE_FLOAT16_VALUE(35.13), SIMDE_FLOAT16_VALUE(-36.13), SIMDE_FLOAT16_VALUE(-42.60), SIMDE_FLOAT16_VALUE(-23.46), + SIMDE_FLOAT16_VALUE(-42.93), SIMDE_FLOAT16_VALUE(-21.16), SIMDE_FLOAT16_VALUE(-29.86), SIMDE_FLOAT16_VALUE(29.04) }, + { SIMDE_FLOAT16_VALUE(37.40), SIMDE_FLOAT16_VALUE(30.74), SIMDE_FLOAT16_VALUE(30.82), SIMDE_FLOAT16_VALUE(44.38), + SIMDE_FLOAT16_VALUE(-30.52), SIMDE_FLOAT16_VALUE(42.55), SIMDE_FLOAT16_VALUE(16.70), SIMDE_FLOAT16_VALUE(47.97) } }, + { SIMDE_FLOAT16_VALUE(41.68), SIMDE_FLOAT16_VALUE(12.98)}, + { { SIMDE_FLOAT16_VALUE(35.13), SIMDE_FLOAT16_VALUE(-36.13), SIMDE_FLOAT16_VALUE(-42.60), SIMDE_FLOAT16_VALUE(-23.46), + SIMDE_FLOAT16_VALUE(-42.93), SIMDE_FLOAT16_VALUE(41.68), SIMDE_FLOAT16_VALUE(-29.86), SIMDE_FLOAT16_VALUE(29.04) }, + { SIMDE_FLOAT16_VALUE(37.40), SIMDE_FLOAT16_VALUE(30.74), SIMDE_FLOAT16_VALUE(30.82), SIMDE_FLOAT16_VALUE(44.38), + SIMDE_FLOAT16_VALUE(-30.52), SIMDE_FLOAT16_VALUE(12.98), SIMDE_FLOAT16_VALUE(16.70), SIMDE_FLOAT16_VALUE(47.97) } } }, + { { { SIMDE_FLOAT16_VALUE(45.22), SIMDE_FLOAT16_VALUE(-8.00), SIMDE_FLOAT16_VALUE(-27.03), SIMDE_FLOAT16_VALUE(48.60), + SIMDE_FLOAT16_VALUE(19.87), SIMDE_FLOAT16_VALUE(-1.72), SIMDE_FLOAT16_VALUE(-45.40), SIMDE_FLOAT16_VALUE(-3.56) }, + { SIMDE_FLOAT16_VALUE(36.60), SIMDE_FLOAT16_VALUE(-32.25), SIMDE_FLOAT16_VALUE(13.01), SIMDE_FLOAT16_VALUE(-12.64), + SIMDE_FLOAT16_VALUE(22.43), SIMDE_FLOAT16_VALUE(1.87), SIMDE_FLOAT16_VALUE(-38.57), SIMDE_FLOAT16_VALUE(-24.61) } }, + { SIMDE_FLOAT16_VALUE(49.75), SIMDE_FLOAT16_VALUE(-1.23)}, + { { SIMDE_FLOAT16_VALUE(45.22), SIMDE_FLOAT16_VALUE(-8.00), SIMDE_FLOAT16_VALUE(-27.03), SIMDE_FLOAT16_VALUE(48.60), + SIMDE_FLOAT16_VALUE(19.87), SIMDE_FLOAT16_VALUE(-1.72), SIMDE_FLOAT16_VALUE(49.75), SIMDE_FLOAT16_VALUE(-3.56) }, + { SIMDE_FLOAT16_VALUE(36.60), SIMDE_FLOAT16_VALUE(-32.25), SIMDE_FLOAT16_VALUE(13.01), SIMDE_FLOAT16_VALUE(-12.64), + SIMDE_FLOAT16_VALUE(22.43), SIMDE_FLOAT16_VALUE(1.87), SIMDE_FLOAT16_VALUE(-1.23), SIMDE_FLOAT16_VALUE(-24.61) } } }, + { { { SIMDE_FLOAT16_VALUE(24.31), SIMDE_FLOAT16_VALUE(-43.53), SIMDE_FLOAT16_VALUE(20.69), SIMDE_FLOAT16_VALUE(26.28), + SIMDE_FLOAT16_VALUE(44.06), SIMDE_FLOAT16_VALUE(42.57), SIMDE_FLOAT16_VALUE(7.84), SIMDE_FLOAT16_VALUE(-4.34) }, + { SIMDE_FLOAT16_VALUE(-7.29), SIMDE_FLOAT16_VALUE(5.56), SIMDE_FLOAT16_VALUE(-20.99), SIMDE_FLOAT16_VALUE(-46.49), + SIMDE_FLOAT16_VALUE(24.87), SIMDE_FLOAT16_VALUE(40.56), SIMDE_FLOAT16_VALUE(-20.02), SIMDE_FLOAT16_VALUE(38.66) } }, + { SIMDE_FLOAT16_VALUE(17.79), SIMDE_FLOAT16_VALUE(37.56)}, + { { SIMDE_FLOAT16_VALUE(24.31), SIMDE_FLOAT16_VALUE(-43.53), SIMDE_FLOAT16_VALUE(20.69), SIMDE_FLOAT16_VALUE(26.28), + SIMDE_FLOAT16_VALUE(44.06), SIMDE_FLOAT16_VALUE(42.57), SIMDE_FLOAT16_VALUE(7.84), SIMDE_FLOAT16_VALUE(17.79) }, + { SIMDE_FLOAT16_VALUE(-7.29), SIMDE_FLOAT16_VALUE(5.56), SIMDE_FLOAT16_VALUE(-20.99), SIMDE_FLOAT16_VALUE(-46.49), + SIMDE_FLOAT16_VALUE(24.87), SIMDE_FLOAT16_VALUE(40.56), SIMDE_FLOAT16_VALUE(-20.02), SIMDE_FLOAT16_VALUE(37.56) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8x2_t r, src, expected; + src.val[0] = simde_vld1q_f16(test_vec[i].src[0]); + src.val[1] = simde_vld1q_f16(test_vec[i].src[1]); + + SIMDE_CONSTIFY_8_(simde_vld2q_lane_f16, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_f16(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_f16(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_f16x8(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f16x8(r.val[1], expected.val[1], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld2q_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32_t src[2][4]; + simde_float32_t buf[2]; + simde_float32_t r[2][4]; + } test_vec[] = { + { { { SIMDE_FLOAT32_C(2894.40), SIMDE_FLOAT32_C(2128.37), SIMDE_FLOAT32_C(-2323.09), SIMDE_FLOAT32_C(1580.08) }, + { SIMDE_FLOAT32_C(2459.79), SIMDE_FLOAT32_C(-3713.19), SIMDE_FLOAT32_C(3258.52), SIMDE_FLOAT32_C(-1845.95) } }, + { SIMDE_FLOAT32_C(806.30), SIMDE_FLOAT32_C(3875.55)}, + { { SIMDE_FLOAT32_C(806.30), SIMDE_FLOAT32_C(2128.37), SIMDE_FLOAT32_C(-2323.09), SIMDE_FLOAT32_C(1580.08) }, + { SIMDE_FLOAT32_C(3875.55), SIMDE_FLOAT32_C(-3713.19), SIMDE_FLOAT32_C(3258.52), SIMDE_FLOAT32_C(-1845.95) } } }, + { { { SIMDE_FLOAT32_C(-827.29), SIMDE_FLOAT32_C(-188.20), SIMDE_FLOAT32_C(3036.33), SIMDE_FLOAT32_C(-2521.65) }, + { SIMDE_FLOAT32_C(1688.02), SIMDE_FLOAT32_C(-4475.05), SIMDE_FLOAT32_C(-1748.43), SIMDE_FLOAT32_C(-4757.62) } }, + { SIMDE_FLOAT32_C(3632.65), SIMDE_FLOAT32_C(4364.49)}, + { { SIMDE_FLOAT32_C(-827.29), SIMDE_FLOAT32_C(3632.65), SIMDE_FLOAT32_C(3036.33), SIMDE_FLOAT32_C(-2521.65) }, + { SIMDE_FLOAT32_C(1688.02), SIMDE_FLOAT32_C(4364.49), SIMDE_FLOAT32_C(-1748.43), SIMDE_FLOAT32_C(-4757.62) } } }, + { { { SIMDE_FLOAT32_C(-1210.04), SIMDE_FLOAT32_C(-1519.92), SIMDE_FLOAT32_C(-362.73), SIMDE_FLOAT32_C(-1943.77) }, + { SIMDE_FLOAT32_C(-3218.48), SIMDE_FLOAT32_C(3093.08), SIMDE_FLOAT32_C(1976.08), SIMDE_FLOAT32_C(3235.56) } }, + { SIMDE_FLOAT32_C(91.16), SIMDE_FLOAT32_C(-1366.95)}, + { { SIMDE_FLOAT32_C(-1210.04), SIMDE_FLOAT32_C(-1519.92), SIMDE_FLOAT32_C(91.16), SIMDE_FLOAT32_C(-1943.77) }, + { SIMDE_FLOAT32_C(-3218.48), SIMDE_FLOAT32_C(3093.08), SIMDE_FLOAT32_C(-1366.95), SIMDE_FLOAT32_C(3235.56) } } }, + { { { SIMDE_FLOAT32_C(1155.21), SIMDE_FLOAT32_C(2979.92), SIMDE_FLOAT32_C(3342.26), SIMDE_FLOAT32_C(2850.18) }, + { SIMDE_FLOAT32_C(-1185.46), SIMDE_FLOAT32_C(1494.90), SIMDE_FLOAT32_C(3871.56), SIMDE_FLOAT32_C(4240.34) } }, + { SIMDE_FLOAT32_C(4082.13), SIMDE_FLOAT32_C(479.75)}, + { { SIMDE_FLOAT32_C(1155.21), SIMDE_FLOAT32_C(2979.92), SIMDE_FLOAT32_C(3342.26), SIMDE_FLOAT32_C(4082.13) }, + { SIMDE_FLOAT32_C(-1185.46), SIMDE_FLOAT32_C(1494.90), SIMDE_FLOAT32_C(3871.56), SIMDE_FLOAT32_C(479.75) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4x2_t r, src, expected; + src.val[0] = simde_vld1q_f32(test_vec[i].src[0]); + src.val[1] = simde_vld1q_f32(test_vec[i].src[1]); + + SIMDE_CONSTIFY_4_(simde_vld2q_lane_f32, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_f32(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_f32(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_f32x4(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f32x4(r.val[1], expected.val[1], INT_MAX); + } + + return 0; + +} + +static int +test_simde_vld2q_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64_t src[2][2]; + simde_float64_t buf[2]; + simde_float64_t r[2][2]; + } test_vec[] = { + { { { SIMDE_FLOAT64_C(-441230.65), SIMDE_FLOAT64_C(460847.11) }, + { SIMDE_FLOAT64_C(178930.01), SIMDE_FLOAT64_C(325559.94) } }, + { SIMDE_FLOAT64_C(244879.30), SIMDE_FLOAT64_C(-169707.98)}, + { { SIMDE_FLOAT64_C(244879.30), SIMDE_FLOAT64_C(460847.11) }, + { SIMDE_FLOAT64_C(-169707.98), SIMDE_FLOAT64_C(325559.94) } } }, + { { { SIMDE_FLOAT64_C(-157130.88), SIMDE_FLOAT64_C(-243630.90) }, + { SIMDE_FLOAT64_C(173252.82), SIMDE_FLOAT64_C(96002.40) } }, + { SIMDE_FLOAT64_C(429185.16), SIMDE_FLOAT64_C(-474490.37)}, + { { SIMDE_FLOAT64_C(-157130.88), SIMDE_FLOAT64_C(429185.16) }, + { SIMDE_FLOAT64_C(173252.82), SIMDE_FLOAT64_C(-474490.37) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2x2_t r, src, expected; + src.val[0] = simde_vld1q_f64(test_vec[i].src[0]); + src.val[1] = simde_vld1q_f64(test_vec[i].src[1]); + + SIMDE_CONSTIFY_2_(simde_vld2q_lane_f64, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_f64(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_f64(test_vec[i].r[1]); + + simde_test_arm_neon_assert_equal_f64x2(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f64x2(r.val[1], expected.val[1], INT_MAX); + } + + return 0; + +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_lane_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_lane_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_lane_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_lane_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_lane_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_lane_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_lane_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_lane_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_lane_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_lane_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2_lane_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_lane_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_lane_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_lane_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_lane_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_lane_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_lane_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_lane_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_lane_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_lane_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_lane_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_lane_f64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/ld3.c b/test/arm/neon/ld3.c new file mode 100644 index 000000000..d6287174a --- /dev/null +++ b/test/arm/neon/ld3.c @@ -0,0 +1,1678 @@ +#define SIMDE_TEST_ARM_NEON_INSN ld3 + +#include "test-neon.h" +#include "../../../simde/arm/neon/ld3.h" + +#if !defined(SIMDE_BUG_INTEL_857088) + +static int +test_simde_vld3_s8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int8_t a[24]; + int8_t r[3][8]; + } test_vec[] = { + { { + -INT8_C( 125), + INT8_C( 63), + -INT8_C( 63), + -INT8_C( 82), + -INT8_C( 12), + -INT8_C( 118), + -INT8_C( 51), + INT8_C( 81), + INT8_C( 94), + -INT8_C( 114), + INT8_C( 72), + INT8_C( 42), + INT8_C( 105), + -INT8_C( 10), + -INT8_C( 88), + INT8_C( 0), + INT8_C( 76), + -INT8_C( 57), + -INT8_C( 73), + INT8_C( 20), + -INT8_C( 6), + INT8_C( 100), + -INT8_C( 89), + INT8_C( 16), + }, + { { -INT8_C( 125), -INT8_C( 82), -INT8_C( 51), -INT8_C( 114), INT8_C( 105), INT8_C( 0), -INT8_C( 73), INT8_C( 100) }, + { INT8_C( 63), -INT8_C( 12), INT8_C( 81), INT8_C( 72), -INT8_C( 10), INT8_C( 76), INT8_C( 20), -INT8_C( 89) }, + { -INT8_C( 63), -INT8_C( 118), INT8_C( 94), INT8_C( 42), -INT8_C( 88), -INT8_C( 57), -INT8_C( 6), INT8_C( 16) }, + }, + }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x8x3_t r = simde_vld3_s8(test_vec[i].a); + + simde_int8x8x3_t expected = { + {simde_vld1_s8(test_vec[i].r[0]), simde_vld1_s8(test_vec[i].r[1]), simde_vld1_s8(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_i8x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i8x8(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i8x8(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[12]; + simde_float16_t r[3][4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(37.48), SIMDE_FLOAT16_VALUE(43.92), SIMDE_FLOAT16_VALUE(23.99), SIMDE_FLOAT16_VALUE(-35.43), SIMDE_FLOAT16_VALUE(5.05), SIMDE_FLOAT16_VALUE(23.98), + SIMDE_FLOAT16_VALUE(12.03), SIMDE_FLOAT16_VALUE(5.29), SIMDE_FLOAT16_VALUE(-23.02), SIMDE_FLOAT16_VALUE(-4.98), SIMDE_FLOAT16_VALUE(26.21), SIMDE_FLOAT16_VALUE(18.74) }, + { { SIMDE_FLOAT16_VALUE(37.48), SIMDE_FLOAT16_VALUE(-35.43), SIMDE_FLOAT16_VALUE(12.03), SIMDE_FLOAT16_VALUE(-4.98) }, + { SIMDE_FLOAT16_VALUE(43.92), SIMDE_FLOAT16_VALUE(5.05), SIMDE_FLOAT16_VALUE(5.29), SIMDE_FLOAT16_VALUE(26.21) }, + { SIMDE_FLOAT16_VALUE(23.99), SIMDE_FLOAT16_VALUE(23.98), SIMDE_FLOAT16_VALUE(-23.02), SIMDE_FLOAT16_VALUE(18.74) } } }, + { { SIMDE_FLOAT16_VALUE(42.18), SIMDE_FLOAT16_VALUE(31.30), SIMDE_FLOAT16_VALUE(25.00), SIMDE_FLOAT16_VALUE(-28.75), SIMDE_FLOAT16_VALUE(25.17), SIMDE_FLOAT16_VALUE(18.49), + SIMDE_FLOAT16_VALUE(-18.33), SIMDE_FLOAT16_VALUE(5.37), SIMDE_FLOAT16_VALUE(24.27), SIMDE_FLOAT16_VALUE(-41.97), SIMDE_FLOAT16_VALUE(9.83), SIMDE_FLOAT16_VALUE(-0.27) }, + { { SIMDE_FLOAT16_VALUE(42.18), SIMDE_FLOAT16_VALUE(-28.75), SIMDE_FLOAT16_VALUE(-18.33), SIMDE_FLOAT16_VALUE(-41.97) }, + { SIMDE_FLOAT16_VALUE(31.30), SIMDE_FLOAT16_VALUE(25.17), SIMDE_FLOAT16_VALUE(5.37), SIMDE_FLOAT16_VALUE(9.83) }, + { SIMDE_FLOAT16_VALUE(25.00), SIMDE_FLOAT16_VALUE(18.49), SIMDE_FLOAT16_VALUE(24.27), SIMDE_FLOAT16_VALUE(-0.27) } } }, + { { SIMDE_FLOAT16_VALUE(-21.37), SIMDE_FLOAT16_VALUE(9.46), SIMDE_FLOAT16_VALUE(-7.71), SIMDE_FLOAT16_VALUE(30.30), SIMDE_FLOAT16_VALUE(-10.69), SIMDE_FLOAT16_VALUE(3.64), + SIMDE_FLOAT16_VALUE(-43.24), SIMDE_FLOAT16_VALUE(13.34), SIMDE_FLOAT16_VALUE(31.94), SIMDE_FLOAT16_VALUE(-23.61), SIMDE_FLOAT16_VALUE(40.34), SIMDE_FLOAT16_VALUE(37.79) }, + { { SIMDE_FLOAT16_VALUE(-21.37), SIMDE_FLOAT16_VALUE(30.30), SIMDE_FLOAT16_VALUE(-43.24), SIMDE_FLOAT16_VALUE(-23.61) }, + { SIMDE_FLOAT16_VALUE(9.46), SIMDE_FLOAT16_VALUE(-10.69), SIMDE_FLOAT16_VALUE(13.34), SIMDE_FLOAT16_VALUE(40.34) }, + { SIMDE_FLOAT16_VALUE(-7.71), SIMDE_FLOAT16_VALUE(3.64), SIMDE_FLOAT16_VALUE(31.94), SIMDE_FLOAT16_VALUE(37.79) } } }, + { { SIMDE_FLOAT16_VALUE(3.11), SIMDE_FLOAT16_VALUE(22.56), SIMDE_FLOAT16_VALUE(15.35), SIMDE_FLOAT16_VALUE(27.37), SIMDE_FLOAT16_VALUE(22.92), SIMDE_FLOAT16_VALUE(-29.85), + SIMDE_FLOAT16_VALUE(-16.70), SIMDE_FLOAT16_VALUE(47.88), SIMDE_FLOAT16_VALUE(-39.84), SIMDE_FLOAT16_VALUE(23.71), SIMDE_FLOAT16_VALUE(28.60), SIMDE_FLOAT16_VALUE(10.29) }, + { { SIMDE_FLOAT16_VALUE(3.11), SIMDE_FLOAT16_VALUE(27.37), SIMDE_FLOAT16_VALUE(-16.70), SIMDE_FLOAT16_VALUE(23.71) }, + { SIMDE_FLOAT16_VALUE(22.56), SIMDE_FLOAT16_VALUE(22.92), SIMDE_FLOAT16_VALUE(47.88), SIMDE_FLOAT16_VALUE(28.60) }, + { SIMDE_FLOAT16_VALUE(15.35), SIMDE_FLOAT16_VALUE(-29.85), SIMDE_FLOAT16_VALUE(-39.84), SIMDE_FLOAT16_VALUE(10.29) } } }, + { { SIMDE_FLOAT16_VALUE(17.73), SIMDE_FLOAT16_VALUE(33.62), SIMDE_FLOAT16_VALUE(30.49), SIMDE_FLOAT16_VALUE(-45.52), SIMDE_FLOAT16_VALUE(-32.12), SIMDE_FLOAT16_VALUE(21.94), + SIMDE_FLOAT16_VALUE(-24.75), SIMDE_FLOAT16_VALUE(-16.54), SIMDE_FLOAT16_VALUE(23.29), SIMDE_FLOAT16_VALUE(40.42), SIMDE_FLOAT16_VALUE(10.94), SIMDE_FLOAT16_VALUE(16.67) }, + { { SIMDE_FLOAT16_VALUE(17.73), SIMDE_FLOAT16_VALUE(-45.52), SIMDE_FLOAT16_VALUE(-24.75), SIMDE_FLOAT16_VALUE(40.42) }, + { SIMDE_FLOAT16_VALUE(33.62), SIMDE_FLOAT16_VALUE(-32.12), SIMDE_FLOAT16_VALUE(-16.54), SIMDE_FLOAT16_VALUE(10.94) }, + { SIMDE_FLOAT16_VALUE(30.49), SIMDE_FLOAT16_VALUE(21.94), SIMDE_FLOAT16_VALUE(23.29), SIMDE_FLOAT16_VALUE(16.67) } } }, + { { SIMDE_FLOAT16_VALUE(-36.37), SIMDE_FLOAT16_VALUE(19.72), SIMDE_FLOAT16_VALUE(34.16), SIMDE_FLOAT16_VALUE(12.31), SIMDE_FLOAT16_VALUE(-32.49), SIMDE_FLOAT16_VALUE(18.71), + SIMDE_FLOAT16_VALUE(32.02), SIMDE_FLOAT16_VALUE(-3.55), SIMDE_FLOAT16_VALUE(37.82), SIMDE_FLOAT16_VALUE(20.73), SIMDE_FLOAT16_VALUE(31.56), SIMDE_FLOAT16_VALUE(-14.40) }, + { { SIMDE_FLOAT16_VALUE(-36.37), SIMDE_FLOAT16_VALUE(12.31), SIMDE_FLOAT16_VALUE(32.02), SIMDE_FLOAT16_VALUE(20.73) }, + { SIMDE_FLOAT16_VALUE(19.72), SIMDE_FLOAT16_VALUE(-32.49), SIMDE_FLOAT16_VALUE(-3.55), SIMDE_FLOAT16_VALUE(31.56) }, + { SIMDE_FLOAT16_VALUE(34.16), SIMDE_FLOAT16_VALUE(18.71), SIMDE_FLOAT16_VALUE(37.82), SIMDE_FLOAT16_VALUE(-14.40) } } }, + { { SIMDE_FLOAT16_VALUE(-13.66), SIMDE_FLOAT16_VALUE(32.06), SIMDE_FLOAT16_VALUE(44.67), SIMDE_FLOAT16_VALUE(-12.02), SIMDE_FLOAT16_VALUE(23.43), SIMDE_FLOAT16_VALUE(-9.95), + SIMDE_FLOAT16_VALUE(26.57), SIMDE_FLOAT16_VALUE(-36.45), SIMDE_FLOAT16_VALUE(8.78), SIMDE_FLOAT16_VALUE(-10.73), SIMDE_FLOAT16_VALUE(-18.07), SIMDE_FLOAT16_VALUE(-1.21) }, + { { SIMDE_FLOAT16_VALUE(-13.66), SIMDE_FLOAT16_VALUE(-12.02), SIMDE_FLOAT16_VALUE(26.57), SIMDE_FLOAT16_VALUE(-10.73) }, + { SIMDE_FLOAT16_VALUE(32.06), SIMDE_FLOAT16_VALUE(23.43), SIMDE_FLOAT16_VALUE(-36.45), SIMDE_FLOAT16_VALUE(-18.07) }, + { SIMDE_FLOAT16_VALUE(44.67), SIMDE_FLOAT16_VALUE(-9.95), SIMDE_FLOAT16_VALUE(8.78), SIMDE_FLOAT16_VALUE(-1.21) } } }, + { { SIMDE_FLOAT16_VALUE(8.83), SIMDE_FLOAT16_VALUE(37.66), SIMDE_FLOAT16_VALUE(-2.70), SIMDE_FLOAT16_VALUE(-20.01), SIMDE_FLOAT16_VALUE(-39.96), SIMDE_FLOAT16_VALUE(45.71), + SIMDE_FLOAT16_VALUE(-33.07), SIMDE_FLOAT16_VALUE(-21.02), SIMDE_FLOAT16_VALUE(39.55), SIMDE_FLOAT16_VALUE(26.93), SIMDE_FLOAT16_VALUE(25.35), SIMDE_FLOAT16_VALUE(-5.48) }, + { { SIMDE_FLOAT16_VALUE(8.83), SIMDE_FLOAT16_VALUE(-20.01), SIMDE_FLOAT16_VALUE(-33.07), SIMDE_FLOAT16_VALUE(26.93) }, + { SIMDE_FLOAT16_VALUE(37.66), SIMDE_FLOAT16_VALUE(-39.96), SIMDE_FLOAT16_VALUE(-21.02), SIMDE_FLOAT16_VALUE(25.35) }, + { SIMDE_FLOAT16_VALUE(-2.70), SIMDE_FLOAT16_VALUE(45.71), SIMDE_FLOAT16_VALUE(39.55), SIMDE_FLOAT16_VALUE(-5.48) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4x3_t r = simde_vld3_f16(test_vec[i].a); + + simde_float16x4x3_t expected = { + {simde_vld1_f16(test_vec[i].r[0]), simde_vld1_f16(test_vec[i].r[1]), simde_vld1_f16(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_f16x4(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f16x4(r.val[1], expected.val[1], INT_MAX); + } + + return 0; +} +/* +static int +test_simde_vld2_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int16_t a[8]; + int16_t r[2][4]; + } test_vec[] = { + { { INT16_C( 26434), -INT16_C( 7742), -INT16_C( 24667), INT16_C( 2612), + -INT16_C( 16418), -INT16_C( 32141), -INT16_C( 30519), INT16_C( 21039) }, + { { INT16_C( 26434), -INT16_C( 24667), -INT16_C( 16418), -INT16_C( 30519) }, + { -INT16_C( 7742), INT16_C( 2612), -INT16_C( 32141), INT16_C( 21039) } }, + }, + { { INT16_C( 12584), INT16_C( 13680), INT16_C( 9409), INT16_C( 17421), + INT16_C( 27590), -INT16_C( 17399), -INT16_C( 23807), INT16_C( 17632) }, + { { INT16_C( 12584), INT16_C( 9409), INT16_C( 27590), -INT16_C( 23807) }, + { INT16_C( 13680), INT16_C( 17421), -INT16_C( 17399), INT16_C( 17632) } }, + }, + { { -INT16_C( 24054), -INT16_C( 20443), INT16_C( 22849), INT16_C( 8122), + INT16_C( 11544), -INT16_C( 7519), -INT16_C( 11851), -INT16_C( 8652) }, + { { -INT16_C( 24054), INT16_C( 22849), INT16_C( 11544), -INT16_C( 11851) }, + { -INT16_C( 20443), INT16_C( 8122), -INT16_C( 7519), -INT16_C( 8652) } }, + }, + { { -INT16_C( 23294), -INT16_C( 15597), INT16_C( 8649), -INT16_C( 28921), + INT16_C( 4236), -INT16_C( 29365), INT16_C( 11188), -INT16_C( 16687) }, + { { -INT16_C( 23294), INT16_C( 8649), INT16_C( 4236), INT16_C( 11188) }, + { -INT16_C( 15597), -INT16_C( 28921), -INT16_C( 29365), -INT16_C( 16687) } } + }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x4x2_t r = simde_vld2_s16(test_vec[i].a); + + simde_int16x4x2_t expected = { + {simde_vld1_s16(test_vec[i].r[0]), simde_vld1_s16(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_i16x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i16x4(r.val[1], expected.val[1]); + } + + return 0; +#else + for (int i = 0 ; i < 4 ; i++) { + simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); + simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); + simde_int16x4x2_t c = {{a, b}}; + + simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + + int16_t buf[8]; + simde_memcpy(buf, c.val, sizeof(buf)); + simde_int16x4x2_t r = simde_vld2_s16(buf); + + simde_test_arm_neon_write_i16x4x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vld2_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int32_t a[4]; + int32_t r[2][2]; + } test_vec[] = { + { { INT32_C( 886724837), INT32_C( 903508407), + -INT32_C( 977712366), INT32_C( 1183767792) }, + { { INT32_C( 886724837), -INT32_C( 977712366) }, + { INT32_C( 903508407), INT32_C( 1183767792) } }, + }, + { { INT32_C( 1474850969), INT32_C( 975881925), + INT32_C( 1191817794), -INT32_C( 1270968626) }, + { { INT32_C( 1474850969), INT32_C( 1191817794) }, + { INT32_C( 975881925), -INT32_C( 1270968626) } }, + }, + { { -INT32_C( 1628956186), -INT32_C( 1663843702), + -INT32_C( 43938803), INT32_C( 21229672) }, + { { -INT32_C( 1628956186), -INT32_C( 43938803) }, + { -INT32_C( 1663843702), INT32_C( 21229672) } }, + }, + { { INT32_C( 643312736), INT32_C( 878740466), + INT32_C( 58419765), -INT32_C( 558384392) }, + { { INT32_C( 643312736), INT32_C( 58419765) }, + { INT32_C( 878740466), -INT32_C( 558384392) } }, + }, + { { INT32_C( 1568448467), INT32_C( 1861832801), + INT32_C( 1164729308), -INT32_C( 1421430965) }, + { { INT32_C( 1568448467), INT32_C( 1164729308) }, + { INT32_C( 1861832801), -INT32_C( 1421430965) } }, + }, + { { -INT32_C( 841900069), INT32_C( 1459761698), + -INT32_C( 1806008932), INT32_C( 175247927) }, + { { -INT32_C( 841900069), -INT32_C( 1806008932) }, + { INT32_C( 1459761698), INT32_C( 175247927) } }, + }, + { { INT32_C( 325578673), INT32_C( 461463871), + INT32_C( 123792828), INT32_C( 2024974237) }, + { { INT32_C( 325578673), INT32_C( 123792828) }, + { INT32_C( 461463871), INT32_C( 2024974237) } }, + }, + { { INT32_C( 1749451846), INT32_C( 1388267702), + -INT32_C( 35251771), -INT32_C( 586721237) }, + { { INT32_C( 1749451846), -INT32_C( 35251771) }, + { INT32_C( 1388267702), -INT32_C( 586721237) } }} + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x2x2_t r = simde_vld2_s32(test_vec[i].a); + + simde_int32x2x2_t expected = { + {simde_vld1_s32(test_vec[i].r[0]), simde_vld1_s32(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_i32x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i32x2(r.val[1], expected.val[1]); + } + + return 0; +#else + for (int i = 0 ; i < 8 ; i++) { + simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); + simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); + simde_int32x2x2_t c = {{a, b}}; + + simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + + int32_t buf[4]; + simde_memcpy(buf, c.val, sizeof(buf)); + simde_int32x2x2_t r = simde_vld2_s32(buf); + + simde_test_arm_neon_write_i32x2x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vld2_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int64_t a[2]; + int64_t r[2][1]; + } test_vec[] = { + { { INT64_C( 2491657136620446655), + -INT64_C( 8008712209217472471) }, + { { INT64_C( 2491657136620446655) }, + { -INT64_C( 8008712209217472471) } } + }, + { { INT64_C( 5905840427281538397), + -INT64_C( 256047405469913514) }, + { { INT64_C( 5905840427281538397) }, + { -INT64_C( 256047405469913514) } }, + }, + { { INT64_C( 1410598559050352250), + -INT64_C( 8581208681535646293) }, + { { INT64_C( 1410598559050352250) }, + { -INT64_C( 8581208681535646293) } }, + }, + { { INT64_C( 4768888611226069577), + INT64_C( 5613591610635419053) }, + { { INT64_C( 4768888611226069577) }, + { INT64_C( 5613591610635419053) } }, + }, + { { -INT64_C( 1999508928546814398), + -INT64_C( 8729622882906418906) }, + { { -INT64_C( 1999508928546814398) }, + { -INT64_C( 8729622882906418906) } }, + }, + { { -INT64_C( 8178376526721227951), + INT64_C( 5129424078989003022) }, + { { -INT64_C( 8178376526721227951) }, + { INT64_C( 5129424078989003022) } }, + }, + { { INT64_C( 7224815307703184678), + -INT64_C( 556316978773065340) }, + { { INT64_C( 7224815307703184678) }, + { -INT64_C( 556316978773065340) } }, + }, + { { INT64_C( 5360328353875667699), + INT64_C( 7100870651391302719) }, + { { INT64_C( 5360328353875667699) }, + { INT64_C( 7100870651391302719) } }} + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x1x2_t r = simde_vld2_s64(test_vec[i].a); + + simde_int64x1x2_t expected = { + {simde_vld1_s64(test_vec[i].r[0]), simde_vld1_s64(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_i64x1(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i64x1(r.val[1], expected.val[1]); + } + + return 0; +#else + for (int i = 0 ; i < 8 ; i++) { + simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); + simde_int64x1_t b = simde_test_arm_neon_random_i64x1(); + simde_int64x1x2_t c = {{a, b}}; + + simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + + int64_t buf[4]; + simde_memcpy(buf, c.val, sizeof(buf)); + simde_int64x1x2_t r = simde_vld2_s64(buf); + + simde_test_arm_neon_write_i64x1x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vld2_u8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint8_t a[16]; + uint8_t r[2][8]; + } test_vec[] = { + + { { UINT8_C(144), UINT8_C(234), UINT8_C(130), UINT8_C(145), UINT8_C( 26), UINT8_C(241), UINT8_C( 35), UINT8_C( 43), + UINT8_C( 76), UINT8_C(223), UINT8_C(152), UINT8_C(128), UINT8_C(203), UINT8_C( 66), UINT8_C( 17), UINT8_C(218) }, + { { UINT8_C(144), UINT8_C(130), UINT8_C( 26), UINT8_C( 35), UINT8_C( 76), UINT8_C(152), UINT8_C(203), UINT8_C( 17) }, + { UINT8_C(234), UINT8_C(145), UINT8_C(241), UINT8_C( 43), UINT8_C(223), UINT8_C(128), UINT8_C( 66), UINT8_C(218) } }, + }, + { { UINT8_C(196), UINT8_C( 14), UINT8_C( 36), UINT8_C( 59), UINT8_C(230), UINT8_C(253), UINT8_C(216), UINT8_C( 14), + UINT8_C( 31), UINT8_C( 73), UINT8_C( 48), UINT8_C( 55), UINT8_C(200), UINT8_C( 71), UINT8_C(176), UINT8_C( 88) }, + { { UINT8_C(196), UINT8_C( 36), UINT8_C(230), UINT8_C(216), UINT8_C( 31), UINT8_C( 48), UINT8_C(200), UINT8_C(176) }, + { UINT8_C( 14), UINT8_C( 59), UINT8_C(253), UINT8_C( 14), UINT8_C( 73), UINT8_C( 55), UINT8_C( 71), UINT8_C( 88) } }, + }, + { { UINT8_C( 49), UINT8_C( 50), UINT8_C(233), UINT8_C( 76), UINT8_C( 35), UINT8_C( 13), UINT8_C(119), UINT8_C(111), + UINT8_C(236), UINT8_C( 15), UINT8_C(240), UINT8_C(184), UINT8_C( 81), UINT8_C( 1), UINT8_C(146), UINT8_C( 22) }, + { { UINT8_C( 49), UINT8_C(233), UINT8_C( 35), UINT8_C(119), UINT8_C(236), UINT8_C(240), UINT8_C( 81), UINT8_C(146) }, + { UINT8_C( 50), UINT8_C( 76), UINT8_C( 13), UINT8_C(111), UINT8_C( 15), UINT8_C(184), UINT8_C( 1), UINT8_C( 22) } }, + }, + { { UINT8_C( 15), UINT8_C(182), UINT8_C( 81), UINT8_C(245), UINT8_C(179), UINT8_C( 41), UINT8_C( 4), UINT8_C(211), + UINT8_C(115), UINT8_C( 52), UINT8_C( 10), UINT8_C( 59), UINT8_C(123), UINT8_C(187), UINT8_C(147), UINT8_C(173) }, + { { UINT8_C( 15), UINT8_C( 81), UINT8_C(179), UINT8_C( 4), UINT8_C(115), UINT8_C( 10), UINT8_C(123), UINT8_C(147) }, + { UINT8_C(182), UINT8_C(245), UINT8_C( 41), UINT8_C(211), UINT8_C( 52), UINT8_C( 59), UINT8_C(187), UINT8_C(173) } }, + }, + { { UINT8_C(237), UINT8_C(125), UINT8_C(249), UINT8_C( 17), UINT8_C(138), UINT8_C(112), UINT8_C(128), UINT8_C(118), + UINT8_C(127), UINT8_C(112), UINT8_C( 46), UINT8_C(208), UINT8_C(113), UINT8_C(193), UINT8_C(230), UINT8_C(129) }, + { { UINT8_C(237), UINT8_C(249), UINT8_C(138), UINT8_C(128), UINT8_C(127), UINT8_C( 46), UINT8_C(113), UINT8_C(230) }, + { UINT8_C(125), UINT8_C( 17), UINT8_C(112), UINT8_C(118), UINT8_C(112), UINT8_C(208), UINT8_C(193), UINT8_C(129) } }, + }, + { { UINT8_C(119), UINT8_C( 56), UINT8_C(118), UINT8_C( 43), UINT8_C( 97), UINT8_C(122), UINT8_C(254), UINT8_C(212), + UINT8_C(175), UINT8_C( 8), UINT8_C( 15), UINT8_C( 42), UINT8_C(195), UINT8_C(163), UINT8_C(215), UINT8_C(177) }, + { { UINT8_C(119), UINT8_C(118), UINT8_C( 97), UINT8_C(254), UINT8_C(175), UINT8_C( 15), UINT8_C(195), UINT8_C(215) }, + { UINT8_C( 56), UINT8_C( 43), UINT8_C(122), UINT8_C(212), UINT8_C( 8), UINT8_C( 42), UINT8_C(163), UINT8_C(177) } }, + }, + { { UINT8_C( 32), UINT8_C(208), UINT8_C(194), UINT8_C(170), UINT8_C( 64), UINT8_C( 66), UINT8_C( 32), UINT8_C(191), + UINT8_C(179), UINT8_C( 79), UINT8_C(144), UINT8_C( 36), UINT8_C( 16), UINT8_C(118), UINT8_C(165), UINT8_C(135) }, + { { UINT8_C( 32), UINT8_C(194), UINT8_C( 64), UINT8_C( 32), UINT8_C(179), UINT8_C(144), UINT8_C( 16), UINT8_C(165) }, + { UINT8_C(208), UINT8_C(170), UINT8_C( 66), UINT8_C(191), UINT8_C( 79), UINT8_C( 36), UINT8_C(118), UINT8_C(135) } }, + }, + { { UINT8_C(174), UINT8_C( 28), UINT8_C(178), UINT8_C( 16), UINT8_C(150), UINT8_C(176), UINT8_C(228), UINT8_C( 69), + UINT8_C(185), UINT8_C(244), UINT8_C(112), UINT8_C(124), UINT8_C(151), UINT8_C( 71), UINT8_C( 45), UINT8_C(183) }, + { { UINT8_C(174), UINT8_C(178), UINT8_C(150), UINT8_C(228), UINT8_C(185), UINT8_C(112), UINT8_C(151), UINT8_C( 45) }, + { UINT8_C( 28), UINT8_C( 16), UINT8_C(176), UINT8_C( 69), UINT8_C(244), UINT8_C(124), UINT8_C( 71), UINT8_C(183) } }, + }, + + + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x8x2_t r = simde_vld2_u8(test_vec[i].a); + + simde_uint8x8x2_t expected = { + {simde_vld1_u8(test_vec[i].r[0]), simde_vld1_u8(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_u8x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u8x8(r.val[1], expected.val[1]); + } + + return 0; +#else + for (int i = 0 ; i < 8 ; i++) { + simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); + simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); + simde_uint8x8x2_t c = {{a, b}}; + + simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + + uint8_t buf[16]; + simde_memcpy(buf, c.val, sizeof(buf)); + simde_uint8x8x2_t r = simde_vld2_u8(buf); + + simde_test_arm_neon_write_u8x8x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vld2_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint16_t a[8]; + uint16_t r[2][4]; + } test_vec[] = { + { { UINT16_C(42664), UINT16_C(53887), UINT16_C(10958), UINT16_C(61123), + UINT16_C( 7800), UINT16_C(15834), UINT16_C(36089), UINT16_C(22799) }, + { { UINT16_C(42664), UINT16_C(10958), UINT16_C( 7800), UINT16_C(36089) }, + { UINT16_C(53887), UINT16_C(61123), UINT16_C(15834), UINT16_C(22799) } }, + }, + { { UINT16_C(62795), UINT16_C(22857), UINT16_C(11632), UINT16_C(39934), + UINT16_C(56742), UINT16_C(64150), UINT16_C( 8095), UINT16_C(18414) }, + { { UINT16_C(62795), UINT16_C(11632), UINT16_C(56742), UINT16_C( 8095) }, + { UINT16_C(22857), UINT16_C(39934), UINT16_C(64150), UINT16_C(18414) } }, + }, + { { UINT16_C(28101), UINT16_C(37914), UINT16_C(56727), UINT16_C( 3970), + UINT16_C(23803), UINT16_C(62797), UINT16_C(23784), UINT16_C(13390) }, + { { UINT16_C(28101), UINT16_C(56727), UINT16_C(23803), UINT16_C(23784) }, + { UINT16_C(37914), UINT16_C( 3970), UINT16_C(62797), UINT16_C(13390) } }, + }, + { { UINT16_C(38737), UINT16_C(49549), UINT16_C(36037), UINT16_C(27485), + UINT16_C(62313), UINT16_C( 2149), UINT16_C(21522), UINT16_C(55375) }, + { { UINT16_C(38737), UINT16_C(36037), UINT16_C(62313), UINT16_C(21522) }, + { UINT16_C(49549), UINT16_C(27485), UINT16_C( 2149), UINT16_C(55375) } }, + }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x4x2_t r = simde_vld2_u16(test_vec[i].a); + + simde_uint16x4x2_t expected = { + {simde_vld1_u16(test_vec[i].r[0]), simde_vld1_u16(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_u16x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u16x4(r.val[1], expected.val[1]); + } + + return 0; +#else + for (int i = 0 ; i < 4 ; i++) { + simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); + simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); + simde_uint16x4x2_t c = {{a, b}}; + + simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + + uint16_t buf[8]; + simde_memcpy(buf, c.val, sizeof(buf)); + simde_uint16x4x2_t r = simde_vld2_u16(buf); + + simde_test_arm_neon_write_u16x4x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vld2_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint32_t a[4]; + uint32_t r[2][2]; + } test_vec[] = { + { { UINT32_C(1500277185), UINT32_C(1114172999), + UINT32_C( 859288906), UINT32_C(1650951697) }, + { { UINT32_C(1500277185), UINT32_C( 859288906) }, + { UINT32_C(1114172999), UINT32_C(1650951697) } }, + }, + { { UINT32_C(3794072605), UINT32_C(3914236288), + UINT32_C(2280764276), UINT32_C(3378462983) }, + { { UINT32_C(3794072605), UINT32_C(2280764276) }, + { UINT32_C(3914236288), UINT32_C(3378462983) } }, + }, + { { UINT32_C(4045589418), UINT32_C( 53775033), + UINT32_C(1362520896), UINT32_C( 263495153) }, + { { UINT32_C(4045589418), UINT32_C(1362520896) }, + { UINT32_C( 53775033), UINT32_C( 263495153) } }, + }, + { { UINT32_C( 317839506), UINT32_C(3455860569), + UINT32_C(4199869939), UINT32_C(3653481262) }, + { { UINT32_C( 317839506), UINT32_C(4199869939) }, + { UINT32_C(3455860569), UINT32_C(3653481262) } }, + }, + { { UINT32_C( 936043902), UINT32_C(2956721776), + UINT32_C(1526821226), UINT32_C(2708124943) }, + { { UINT32_C( 936043902), UINT32_C(1526821226) }, + { UINT32_C(2956721776), UINT32_C(2708124943) } }, + }, + { { UINT32_C(3870514317), UINT32_C(2394206107), + UINT32_C(3414755485), UINT32_C( 983846076) }, + { { UINT32_C(3870514317), UINT32_C(3414755485) }, + { UINT32_C(2394206107), UINT32_C( 983846076) } }, + }, + { { UINT32_C(2725408562), UINT32_C(3612519789), + UINT32_C( 758338334), UINT32_C(2530123017) }, + { { UINT32_C(2725408562), UINT32_C( 758338334) }, + { UINT32_C(3612519789), UINT32_C(2530123017) } }, + }, + { { UINT32_C(2508030713), UINT32_C(3458412849), + UINT32_C(4120554553), UINT32_C( 724582137) }, + { { UINT32_C(2508030713), UINT32_C(4120554553) }, + { UINT32_C(3458412849), UINT32_C( 724582137) } }, + }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x2x2_t r = simde_vld2_u32(test_vec[i].a); + + simde_uint32x2x2_t expected = { + {simde_vld1_u32(test_vec[i].r[0]), simde_vld1_u32(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_u32x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u32x2(r.val[1], expected.val[1]); + } + + return 0; +#else + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); + simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); + simde_uint32x2x2_t c = {{a, b}}; + + simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + + uint32_t buf[4]; + simde_memcpy(buf, c.val, sizeof(buf)); + simde_uint32x2x2_t r = simde_vld2_u32(buf); + + simde_test_arm_neon_write_u32x2x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vld2_u64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint64_t a[2]; + uint64_t r[2][1]; + } test_vec[] = { + { { UINT64_C( 9544671133075875798), + UINT64_C(10026771010818587806) }, + { { UINT64_C( 9544671133075875798) }, + { UINT64_C(10026771010818587806) } }, + }, + { { UINT64_C(11716502022888129015), + UINT64_C( 9207447440231071203) }, + { { UINT64_C(11716502022888129015) }, + { UINT64_C( 9207447440231071203) } }, + }, + { { UINT64_C(14585844370014973971), + UINT64_C(12085455436694909200) }, + { { UINT64_C(14585844370014973971) }, + { UINT64_C(12085455436694909200) } }, + }, + { { UINT64_C( 6976248983748549802), + UINT64_C( 6648178682459053338) }, + { { UINT64_C( 6976248983748549802) }, + { UINT64_C( 6648178682459053338) } }, + }, + { { UINT64_C( 9780321370926949059), + UINT64_C(10409627327296486687) }, + { { UINT64_C( 9780321370926949059) }, + { UINT64_C(10409627327296486687) } }, + }, + { { UINT64_C( 3559687685519800969), + UINT64_C( 9804398810564506218) }, + { { UINT64_C( 3559687685519800969) }, + { UINT64_C( 9804398810564506218) } }, + }, + { { UINT64_C( 2403006863864517466), + UINT64_C( 244715532034575855) }, + { { UINT64_C( 2403006863864517466) }, + { UINT64_C( 244715532034575855) } }, + }, + { { UINT64_C( 1885442199874249441), + UINT64_C(12785682336243916577) }, + { { UINT64_C( 1885442199874249441) }, + { UINT64_C(12785682336243916577) } }, + }, + + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x1x2_t r = simde_vld2_u64(test_vec[i].a); + + simde_uint64x1x2_t expected = { + {simde_vld1_u64(test_vec[i].r[0]), simde_vld1_u64(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_u64x1(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u64x1(r.val[1], expected.val[1]); + } + + return 0; +#else + for (int i = 0 ; i < 8 ; i++) { + simde_uint64x1_t a = simde_test_arm_neon_random_u64x1(); + simde_uint64x1_t b = simde_test_arm_neon_random_u64x1(); + simde_uint64x1x2_t c = {{a, b}}; + + simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + + uint64_t buf[4]; + simde_memcpy(buf, c.val, sizeof(buf)); + simde_uint64x1x2_t r = simde_vld2_u64(buf); + + simde_test_arm_neon_write_u64x1x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vld2_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32_t a[4]; + simde_float32_t r[2][2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( -22.58), SIMDE_FLOAT32_C( -87.59), + SIMDE_FLOAT32_C( 1.12), SIMDE_FLOAT32_C( -68.18) }, + { { SIMDE_FLOAT32_C( -22.58), SIMDE_FLOAT32_C( 1.12) }, + { SIMDE_FLOAT32_C( -87.59), SIMDE_FLOAT32_C( -68.18) } }, + }, + { { SIMDE_FLOAT32_C( -19.64), SIMDE_FLOAT32_C( -79.11), + SIMDE_FLOAT32_C( -13.56), SIMDE_FLOAT32_C( 59.24) }, + { { SIMDE_FLOAT32_C( -19.64), SIMDE_FLOAT32_C( -13.56) }, + { SIMDE_FLOAT32_C( -79.11), SIMDE_FLOAT32_C( 59.24) } }, + }, + { { SIMDE_FLOAT32_C( 84.01), SIMDE_FLOAT32_C( -49.79), + SIMDE_FLOAT32_C( 69.20), SIMDE_FLOAT32_C( 82.05) }, + { { SIMDE_FLOAT32_C( 84.01), SIMDE_FLOAT32_C( 69.20) }, + { SIMDE_FLOAT32_C( -49.79), SIMDE_FLOAT32_C( 82.05) } }, + }, + { { SIMDE_FLOAT32_C( -12.11), SIMDE_FLOAT32_C( 93.29), + SIMDE_FLOAT32_C( 70.29), SIMDE_FLOAT32_C( 70.67) }, + { { SIMDE_FLOAT32_C( -12.11), SIMDE_FLOAT32_C( 70.29) }, + { SIMDE_FLOAT32_C( 93.29), SIMDE_FLOAT32_C( 70.67) } }, + }, + { { SIMDE_FLOAT32_C( 14.62), SIMDE_FLOAT32_C( 51.53), + SIMDE_FLOAT32_C( 11.77), SIMDE_FLOAT32_C( 81.69) }, + { { SIMDE_FLOAT32_C( 14.62), SIMDE_FLOAT32_C( 11.77) }, + { SIMDE_FLOAT32_C( 51.53), SIMDE_FLOAT32_C( 81.69) } }, + }, + { { SIMDE_FLOAT32_C( -19.28), SIMDE_FLOAT32_C( 59.50), + SIMDE_FLOAT32_C( -77.17), SIMDE_FLOAT32_C( -41.73) }, + { { SIMDE_FLOAT32_C( -19.28), SIMDE_FLOAT32_C( -77.17) }, + { SIMDE_FLOAT32_C( 59.50), SIMDE_FLOAT32_C( -41.73) } }, + }, + { { SIMDE_FLOAT32_C( -85.96), SIMDE_FLOAT32_C( 68.07), + SIMDE_FLOAT32_C( -11.65), SIMDE_FLOAT32_C( 31.99) }, + { { SIMDE_FLOAT32_C( -85.96), SIMDE_FLOAT32_C( -11.65) }, + { SIMDE_FLOAT32_C( 68.07), SIMDE_FLOAT32_C( 31.99) } }, + }, + { { SIMDE_FLOAT32_C( -11.72), SIMDE_FLOAT32_C( -86.96), + SIMDE_FLOAT32_C( 52.84), SIMDE_FLOAT32_C( 65.70) }, + { { SIMDE_FLOAT32_C( -11.72), SIMDE_FLOAT32_C( 52.84) }, + { SIMDE_FLOAT32_C( -86.96), SIMDE_FLOAT32_C( 65.70) } } + }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2x2_t r = simde_vld2_f32(test_vec[i].a); + + simde_float32x2x2_t expected = { + {simde_vld1_f32(test_vec[i].r[0]), simde_vld1_f32(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_f32x2(r.val[0], expected.val[0], 1); + simde_test_arm_neon_assert_equal_f32x2(r.val[1], expected.val[1], 1); + } + + return 0; +#else + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-100.0f, 100.0f); + simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-100.0f, 100.0f); + simde_float32x2x2_t c = {{a, b}}; + + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + + simde_float32_t buf[4]; + simde_memcpy(buf, c.val, sizeof(buf)); + simde_float32x2x2_t r = simde_vld2_f32(buf); + + simde_test_arm_neon_write_f32x2x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vld2_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float64_t a[2]; + simde_float64_t r[2][1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( -60.52), + SIMDE_FLOAT64_C( -27.97) }, + { { SIMDE_FLOAT64_C( -60.52) }, + { SIMDE_FLOAT64_C( -27.97) } }, + }, + { { SIMDE_FLOAT64_C( 41.23), + SIMDE_FLOAT64_C( -82.15) }, + { { SIMDE_FLOAT64_C( 41.23) }, + { SIMDE_FLOAT64_C( -82.15) } }, + }, + { { SIMDE_FLOAT64_C( -45.22), + SIMDE_FLOAT64_C( -82.20) }, + { { SIMDE_FLOAT64_C( -45.22) }, + { SIMDE_FLOAT64_C( -82.20) } }, + }, + { { SIMDE_FLOAT64_C( 20.47), + SIMDE_FLOAT64_C( -91.57) }, + { { SIMDE_FLOAT64_C( 20.47) }, + { SIMDE_FLOAT64_C( -91.57) } }, + }, + { { SIMDE_FLOAT64_C( 87.79), + SIMDE_FLOAT64_C( 27.03) }, + { { SIMDE_FLOAT64_C( 87.79) }, + { SIMDE_FLOAT64_C( 27.03) } }, + }, + { { SIMDE_FLOAT64_C( -13.17), + SIMDE_FLOAT64_C( 89.71) }, + { { SIMDE_FLOAT64_C( -13.17) }, + { SIMDE_FLOAT64_C( 89.71) } }, + }, + { { SIMDE_FLOAT64_C( -91.04), + SIMDE_FLOAT64_C( 54.16) }, + { { SIMDE_FLOAT64_C( -91.04) }, + { SIMDE_FLOAT64_C( 54.16) } }, + }, + { { SIMDE_FLOAT64_C( 49.63), + SIMDE_FLOAT64_C( 71.75) }, + { { SIMDE_FLOAT64_C( 49.63) }, + { SIMDE_FLOAT64_C( 71.75) } }, + } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1x2_t r = simde_vld2_f64(test_vec[i].a); + + simde_float64x1x2_t expected = { + {simde_vld1_f64(test_vec[i].r[0]), simde_vld1_f64(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_f64x1(r.val[0], expected.val[0], 1); + simde_test_arm_neon_assert_equal_f64x1(r.val[1], expected.val[1], 1); + } + + return 0; +#else + for (int i = 0 ; i < 8 ; i++) { + simde_float64x1_t a = simde_test_arm_neon_random_f64x1(-100.0, 100.0); + simde_float64x1_t b = simde_test_arm_neon_random_f64x1(-100.0, 100.0); + simde_float64x1x2_t c = {{a, b}}; + + simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + + simde_float64_t buf[4]; + simde_memcpy(buf, c.val, sizeof(buf)); + simde_float64x1x2_t r = simde_vld2_f64(buf); + + simde_test_arm_neon_write_f64x1x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vld2q_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int8_t a[32]; + int8_t r[2][16]; + } test_vec[] = { + { { -INT8_C( 25), INT8_C( 77), INT8_C( 76), INT8_C( 77), INT8_C( 68), INT8_C( 84), INT8_C( 81), INT8_C( 59), + INT8_C( 69), INT8_C( 8), -INT8_C( 108), -INT8_C( 45), -INT8_C( 85), INT8_C( 73), -INT8_C( 110), INT8_C( 69), + -INT8_C( 13), INT8_C( 23), -INT8_C( 80), -INT8_C( 93), -INT8_C( 102), INT8_C( 80), -INT8_C( 63), INT8_C( 67), + INT8_C( 11), -INT8_C( 75), INT8_C( 9), INT8_C( 98), INT8_C( 19), INT8_C( 38), INT8_C( 41), -INT8_C( 6) }, + { { -INT8_C( 25), INT8_C( 76), INT8_C( 68), INT8_C( 81), INT8_C( 69), -INT8_C( 108), -INT8_C( 85), -INT8_C( 110), + -INT8_C( 13), -INT8_C( 80), -INT8_C( 102), -INT8_C( 63), INT8_C( 11), INT8_C( 9), INT8_C( 19), INT8_C( 41) }, + { INT8_C( 77), INT8_C( 77), INT8_C( 84), INT8_C( 59), INT8_C( 8), -INT8_C( 45), INT8_C( 73), INT8_C( 69), + INT8_C( 23), -INT8_C( 93), INT8_C( 80), INT8_C( 67), -INT8_C( 75), INT8_C( 98), INT8_C( 38), -INT8_C( 6) } }, + }, + { { INT8_C( 116), INT8_C( 117), INT8_C( 71), -INT8_C( 72), -INT8_C( 55), -INT8_C( 104), -INT8_C( 13), INT8_C( 15), + -INT8_C( 96), -INT8_C( 121), -INT8_C( 30), INT8_C( 76), -INT8_C( 48), INT8_C( 116), -INT8_C( 111), -INT8_C( 60), + -INT8_C( 117), INT8_C( 65), INT8_C( 103), INT8_C( 38), -INT8_C( 111), INT8_C( 40), INT8_C( 105), -INT8_C( 100), + -INT8_C( 35), INT8_C( 115), -INT8_C( 2), -INT8_C( 15), -INT8_C( 103), INT8_C( 39), -INT8_C( 21), INT8_C( 13) }, + { { INT8_C( 116), INT8_C( 71), -INT8_C( 55), -INT8_C( 13), -INT8_C( 96), -INT8_C( 30), -INT8_C( 48), -INT8_C( 111), + -INT8_C( 117), INT8_C( 103), -INT8_C( 111), INT8_C( 105), -INT8_C( 35), -INT8_C( 2), -INT8_C( 103), -INT8_C( 21) }, + { INT8_C( 117), -INT8_C( 72), -INT8_C( 104), INT8_C( 15), -INT8_C( 121), INT8_C( 76), INT8_C( 116), -INT8_C( 60), + INT8_C( 65), INT8_C( 38), INT8_C( 40), -INT8_C( 100), INT8_C( 115), -INT8_C( 15), INT8_C( 39), INT8_C( 13) } }, + }, + { { -INT8_C( 100), INT8_C( 51), -INT8_C( 59), INT8_C( 102), -INT8_C( 53), -INT8_C( 71), INT8_C( 117), INT8_C( 108), + INT8_C( 64), INT8_C( 87), -INT8_C( 72), INT8_C( 17), -INT8_C( 52), INT8_C( 73), -INT8_C( 43), INT8_C( 87), + -INT8_C( 117), INT8_C( 60), INT8_C( 125), INT8_C( 28), INT8_C( 101), -INT8_C( 25), -INT8_C( 71), INT8_C( 66), + INT8_C( 90), -INT8_C( 73), INT8_C( 51), -INT8_C( 13), -INT8_C( 33), INT8_C( 31), INT8_C( 1), INT8_C( 123) }, + { { -INT8_C( 100), -INT8_C( 59), -INT8_C( 53), INT8_C( 117), INT8_C( 64), -INT8_C( 72), -INT8_C( 52), -INT8_C( 43), + -INT8_C( 117), INT8_C( 125), INT8_C( 101), -INT8_C( 71), INT8_C( 90), INT8_C( 51), -INT8_C( 33), INT8_C( 1) }, + { INT8_C( 51), INT8_C( 102), -INT8_C( 71), INT8_C( 108), INT8_C( 87), INT8_C( 17), INT8_C( 73), INT8_C( 87), + INT8_C( 60), INT8_C( 28), -INT8_C( 25), INT8_C( 66), -INT8_C( 73), -INT8_C( 13), INT8_C( 31), INT8_C( 123) } }, + }, + { { INT8_C( 82), -INT8_C( 58), -INT8_C( 31), INT8_C( 29), INT8_MAX, INT8_C( 86), -INT8_C( 119), -INT8_C( 64), + -INT8_C( 82), INT8_C( 65), -INT8_C( 47), INT8_C( 122), -INT8_C( 117), -INT8_C( 90), -INT8_C( 47), INT8_C( 22), + -INT8_C( 30), INT8_C( 79), INT8_C( 50), INT8_C( 71), INT8_C( 54), -INT8_C( 21), -INT8_C( 118), -INT8_C( 112), + -INT8_C( 93), -INT8_C( 67), -INT8_C( 125), -INT8_C( 126), -INT8_C( 36), -INT8_C( 124), -INT8_C( 3), INT8_C( 46) }, + { { INT8_C( 82), -INT8_C( 31), INT8_MAX, -INT8_C( 119), -INT8_C( 82), -INT8_C( 47), -INT8_C( 117), -INT8_C( 47), + -INT8_C( 30), INT8_C( 50), INT8_C( 54), -INT8_C( 118), -INT8_C( 93), -INT8_C( 125), -INT8_C( 36), -INT8_C( 3) }, + { -INT8_C( 58), INT8_C( 29), INT8_C( 86), -INT8_C( 64), INT8_C( 65), INT8_C( 122), -INT8_C( 90), INT8_C( 22), + INT8_C( 79), INT8_C( 71), -INT8_C( 21), -INT8_C( 112), -INT8_C( 67), -INT8_C( 126), -INT8_C( 124), INT8_C( 46) } }, + }, + { { INT8_C( 75), -INT8_C( 33), INT8_C( 76), -INT8_C( 54), INT8_C( 53), -INT8_C( 43), -INT8_C( 118), -INT8_C( 29), + INT8_C( 23), INT8_C( 91), INT8_C( 93), -INT8_C( 94), INT8_C( 1), INT8_C( 47), -INT8_C( 72), -INT8_C( 28), + INT8_C( 126), -INT8_C( 22), INT8_C( 43), -INT8_C( 76), -INT8_C( 42), -INT8_C( 75), INT8_C( 68), INT8_C( 121), + INT8_C( 115), -INT8_C( 57), -INT8_C( 5), INT8_C( 79), INT8_C( 76), -INT8_C( 8), INT8_C( 126), -INT8_C( 105) }, + { { INT8_C( 75), INT8_C( 76), INT8_C( 53), -INT8_C( 118), INT8_C( 23), INT8_C( 93), INT8_C( 1), -INT8_C( 72), + INT8_C( 126), INT8_C( 43), -INT8_C( 42), INT8_C( 68), INT8_C( 115), -INT8_C( 5), INT8_C( 76), INT8_C( 126) }, + { -INT8_C( 33), -INT8_C( 54), -INT8_C( 43), -INT8_C( 29), INT8_C( 91), -INT8_C( 94), INT8_C( 47), -INT8_C( 28), + -INT8_C( 22), -INT8_C( 76), -INT8_C( 75), INT8_C( 121), -INT8_C( 57), INT8_C( 79), -INT8_C( 8), -INT8_C( 105) } }, + }, + { { -INT8_C( 41), -INT8_C( 54), INT8_C( 97), INT8_C( 13), -INT8_C( 97), -INT8_C( 20), -INT8_C( 16), -INT8_C( 74), + INT8_C( 71), INT8_C( 78), INT8_C( 88), INT8_C( 73), INT8_C( 125), INT8_C( 16), INT8_C( 45), -INT8_C( 5), + -INT8_C( 5), INT8_C( 88), -INT8_C( 81), -INT8_C( 47), INT8_C( 14), -INT8_C( 13), INT8_C( 74), -INT8_C( 127), + -INT8_C( 70), INT8_C( 69), -INT8_C( 48), INT8_C( 6), INT8_C( 61), INT8_C( 78), -INT8_C( 99), INT8_C( 21) }, + { { -INT8_C( 41), INT8_C( 97), -INT8_C( 97), -INT8_C( 16), INT8_C( 71), INT8_C( 88), INT8_C( 125), INT8_C( 45), + -INT8_C( 5), -INT8_C( 81), INT8_C( 14), INT8_C( 74), -INT8_C( 70), -INT8_C( 48), INT8_C( 61), -INT8_C( 99) }, + { -INT8_C( 54), INT8_C( 13), -INT8_C( 20), -INT8_C( 74), INT8_C( 78), INT8_C( 73), INT8_C( 16), -INT8_C( 5), + INT8_C( 88), -INT8_C( 47), -INT8_C( 13), -INT8_C( 127), INT8_C( 69), INT8_C( 6), INT8_C( 78), INT8_C( 21) } }, + }, + { { INT8_C( 24), -INT8_C( 1), INT8_C( 34), -INT8_C( 72), -INT8_C( 21), INT8_C( 18), INT8_C( 110), INT8_C( 50), + INT8_C( 96), -INT8_C( 57), INT8_C( 123), -INT8_C( 35), -INT8_C( 41), -INT8_C( 88), -INT8_C( 40), -INT8_C( 46), + INT8_C( 1), -INT8_C( 121), -INT8_C( 93), INT8_C( 15), INT8_C( 122), -INT8_C( 19), -INT8_C( 112), INT8_C( 53), + INT8_C( 50), INT8_C( 96), INT8_C( 59), INT8_C( 112), -INT8_C( 81), -INT8_C( 39), -INT8_C( 123), -INT8_C( 57) }, + { { INT8_C( 24), INT8_C( 34), -INT8_C( 21), INT8_C( 110), INT8_C( 96), INT8_C( 123), -INT8_C( 41), -INT8_C( 40), + INT8_C( 1), -INT8_C( 93), INT8_C( 122), -INT8_C( 112), INT8_C( 50), INT8_C( 59), -INT8_C( 81), -INT8_C( 123) }, + { -INT8_C( 1), -INT8_C( 72), INT8_C( 18), INT8_C( 50), -INT8_C( 57), -INT8_C( 35), -INT8_C( 88), -INT8_C( 46), + -INT8_C( 121), INT8_C( 15), -INT8_C( 19), INT8_C( 53), INT8_C( 96), INT8_C( 112), -INT8_C( 39), -INT8_C( 57) } }, + }, + { { -INT8_C( 40), -INT8_C( 89), INT8_MAX, -INT8_C( 61), -INT8_C( 71), -INT8_C( 18), -INT8_C( 11), INT8_C( 26), + -INT8_C( 75), INT8_C( 113), -INT8_C( 9), -INT8_C( 116), INT8_C( 25), -INT8_C( 48), INT8_C( 95), INT8_C( 26), + INT8_C( 87), INT8_C( 2), INT8_C( 41), -INT8_C( 46), -INT8_C( 16), -INT8_C( 71), INT8_C( 7), INT8_C( 34), + INT8_C( 26), INT8_C( 66), -INT8_C( 110), -INT8_C( 55), INT8_C( 27), INT8_C( 23), -INT8_C( 112), -INT8_C( 13) }, + { { -INT8_C( 40), INT8_MAX, -INT8_C( 71), -INT8_C( 11), -INT8_C( 75), -INT8_C( 9), INT8_C( 25), INT8_C( 95), + INT8_C( 87), INT8_C( 41), -INT8_C( 16), INT8_C( 7), INT8_C( 26), -INT8_C( 110), INT8_C( 27), -INT8_C( 112) }, + { -INT8_C( 89), -INT8_C( 61), -INT8_C( 18), INT8_C( 26), INT8_C( 113), -INT8_C( 116), -INT8_C( 48), INT8_C( 26), + INT8_C( 2), -INT8_C( 46), -INT8_C( 71), INT8_C( 34), INT8_C( 66), -INT8_C( 55), INT8_C( 23), -INT8_C( 13) } }, + } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x16x2_t r = simde_vld2q_s8(test_vec[i].a); + + simde_int8x16x2_t expected = { + {simde_vld1q_s8(test_vec[i].r[0]), simde_vld1q_s8(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_i8x16(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i8x16(r.val[1], expected.val[1]); + } + + return 0; +#else + for (int i = 0 ; i < 8 ; i++) { + simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); + simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); + simde_int8x16x2_t c = {{a, b}}; + + simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + + int8_t buf[32]; + simde_memcpy(buf, c.val, sizeof(buf)); + simde_int8x16x2_t r = simde_vld2q_s8(buf); + + simde_test_arm_neon_write_i8x16x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vld2q_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int16_t a[16]; + int16_t r[2][8]; + } test_vec[] = { + { { INT16_C( 11850), INT16_C( 12679), INT16_C( 1668), -INT16_C( 2829), INT16_C( 2417), INT16_C( 6613), -INT16_C( 26382), -INT16_C( 6963), + INT16_C( 11746), -INT16_C( 19992), INT16_C( 261), -INT16_C( 26272), -INT16_C( 27245), INT16_C( 20274), -INT16_C( 24440), -INT16_C( 11702) }, + { { INT16_C( 11850), INT16_C( 1668), INT16_C( 2417), -INT16_C( 26382), INT16_C( 11746), INT16_C( 261), -INT16_C( 27245), -INT16_C( 24440) }, + { INT16_C( 12679), -INT16_C( 2829), INT16_C( 6613), -INT16_C( 6963), -INT16_C( 19992), -INT16_C( 26272), INT16_C( 20274), -INT16_C( 11702) } }, + }, + { { -INT16_C( 11826), INT16_C( 21252), -INT16_C( 2089), INT16_C( 18503), INT16_C( 7168), -INT16_C( 3231), INT16_C( 11956), -INT16_C( 26921), + -INT16_C( 16548), INT16_C( 24903), -INT16_C( 22335), INT16_C( 21754), INT16_C( 11325), -INT16_C( 14941), -INT16_C( 4659), -INT16_C( 25704) }, + { { -INT16_C( 11826), -INT16_C( 2089), INT16_C( 7168), INT16_C( 11956), -INT16_C( 16548), -INT16_C( 22335), INT16_C( 11325), -INT16_C( 4659) }, + { INT16_C( 21252), INT16_C( 18503), -INT16_C( 3231), -INT16_C( 26921), INT16_C( 24903), INT16_C( 21754), -INT16_C( 14941), -INT16_C( 25704) } }, + }, + { { -INT16_C( 25410), -INT16_C( 27154), INT16_C( 13715), -INT16_C( 27427), INT16_C( 15953), INT16_C( 1415), INT16_C( 24172), -INT16_C( 14181), + -INT16_C( 7395), -INT16_C( 8663), INT16_C( 9355), -INT16_C( 14286), -INT16_C( 10928), INT16_C( 7566), INT16_C( 9922), -INT16_C( 32583) }, + { { -INT16_C( 25410), INT16_C( 13715), INT16_C( 15953), INT16_C( 24172), -INT16_C( 7395), INT16_C( 9355), -INT16_C( 10928), INT16_C( 9922) }, + { -INT16_C( 27154), -INT16_C( 27427), INT16_C( 1415), -INT16_C( 14181), -INT16_C( 8663), -INT16_C( 14286), INT16_C( 7566), -INT16_C( 32583) } }, + }, + { { -INT16_C( 22590), INT16_C( 21781), -INT16_C( 3363), INT16_C( 12009), INT16_C( 28720), -INT16_C( 25292), -INT16_C( 12338), -INT16_C( 5019), + -INT16_C( 28750), INT16_C( 15818), -INT16_C( 589), INT16_C( 774), -INT16_C( 27438), -INT16_C( 27359), -INT16_C( 9542), INT16_C( 31765) }, + { { -INT16_C( 22590), -INT16_C( 3363), INT16_C( 28720), -INT16_C( 12338), -INT16_C( 28750), -INT16_C( 589), -INT16_C( 27438), -INT16_C( 9542) }, + { INT16_C( 21781), INT16_C( 12009), -INT16_C( 25292), -INT16_C( 5019), INT16_C( 15818), INT16_C( 774), -INT16_C( 27359), INT16_C( 31765) } }, + }, + { { INT16_C( 11137), INT16_C( 24273), -INT16_C( 17635), INT16_C( 20109), -INT16_C( 16085), -INT16_C( 1301), INT16_C( 20624), INT16_C( 17382), + -INT16_C( 20257), -INT16_C( 28032), -INT16_C( 31059), -INT16_C( 32618), -INT16_C( 18662), -INT16_C( 11243), INT16_C( 10897), INT16_C( 4688) }, + { { INT16_C( 11137), -INT16_C( 17635), -INT16_C( 16085), INT16_C( 20624), -INT16_C( 20257), -INT16_C( 31059), -INT16_C( 18662), INT16_C( 10897) }, + { INT16_C( 24273), INT16_C( 20109), -INT16_C( 1301), INT16_C( 17382), -INT16_C( 28032), -INT16_C( 32618), -INT16_C( 11243), INT16_C( 4688) } }, + }, + { { INT16_C( 8789), INT16_C( 29553), -INT16_C( 291), INT16_C( 2241), -INT16_C( 21313), INT16_C( 20226), -INT16_C( 5892), -INT16_C( 9070), + INT16_C( 5017), INT16_C( 18030), INT16_C( 1177), -INT16_C( 19258), -INT16_C( 9285), INT16_C( 19592), -INT16_C( 9978), INT16_C( 23391) }, + { { INT16_C( 8789), -INT16_C( 291), -INT16_C( 21313), -INT16_C( 5892), INT16_C( 5017), INT16_C( 1177), -INT16_C( 9285), -INT16_C( 9978) }, + { INT16_C( 29553), INT16_C( 2241), INT16_C( 20226), -INT16_C( 9070), INT16_C( 18030), -INT16_C( 19258), INT16_C( 19592), INT16_C( 23391) } }, + }, + { { -INT16_C( 12037), -INT16_C( 10034), -INT16_C( 28722), -INT16_C( 29216), -INT16_C( 7365), INT16_C( 14556), INT16_C( 28619), INT16_C( 25620), + -INT16_C( 32126), INT16_C( 7083), INT16_C( 29063), INT16_C( 17103), INT16_C( 22605), INT16_C( 21391), -INT16_C( 4559), INT16_C( 11438) }, + { { -INT16_C( 12037), -INT16_C( 28722), -INT16_C( 7365), INT16_C( 28619), -INT16_C( 32126), INT16_C( 29063), INT16_C( 22605), -INT16_C( 4559) }, + { -INT16_C( 10034), -INT16_C( 29216), INT16_C( 14556), INT16_C( 25620), INT16_C( 7083), INT16_C( 17103), INT16_C( 21391), INT16_C( 11438) } }, + }, + { { INT16_C( 32190), -INT16_C( 29692), -INT16_C( 7156), INT16_C( 18457), -INT16_C( 2617), -INT16_C( 27776), -INT16_C( 27548), -INT16_C( 6409), + -INT16_C( 24042), -INT16_C( 25342), -INT16_C( 12012), INT16_C( 25056), INT16_C( 28457), INT16_C( 23220), INT16_C( 25181), INT16_C( 7046) }, + { { INT16_C( 32190), -INT16_C( 7156), -INT16_C( 2617), -INT16_C( 27548), -INT16_C( 24042), -INT16_C( 12012), INT16_C( 28457), INT16_C( 25181) }, + { -INT16_C( 29692), INT16_C( 18457), -INT16_C( 27776), -INT16_C( 6409), -INT16_C( 25342), INT16_C( 25056), INT16_C( 23220), INT16_C( 7046) } }, + } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8x2_t r = simde_vld2q_s16(test_vec[i].a); + + simde_int16x8x2_t expected = { + {simde_vld1q_s16(test_vec[i].r[0]), simde_vld1q_s16(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_i16x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i16x8(r.val[1], expected.val[1]); + } + + return 0; +#else + for (int i = 0 ; i < 8 ; i++) { + simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); + simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); + simde_int16x8x2_t c = {{a, b}}; + + simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + + int16_t buf[16]; + simde_memcpy(buf, c.val, sizeof(buf)); + simde_int16x8x2_t r = simde_vld2q_s16(buf); + + simde_test_arm_neon_write_i16x8x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vld2q_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int32_t a[8]; + int32_t r[2][4]; + } test_vec[] = { + { { -INT32_C( 1652910308), INT32_C( 1276922200), -INT32_C( 1246624074), INT32_C( 345995066), + -INT32_C( 1909421954), INT32_C( 1484737180), -INT32_C( 1927907536), INT32_C( 1716163914) }, + { { -INT32_C( 1652910308), -INT32_C( 1246624074), -INT32_C( 1909421954), -INT32_C( 1927907536) }, + { INT32_C( 1276922200), INT32_C( 345995066), INT32_C( 1484737180), INT32_C( 1716163914) } }, + }, + { { -INT32_C( 2063350484), -INT32_C( 992927986), INT32_C( 1601798949), INT32_C( 2037651963), + INT32_C( 1023976609), INT32_C( 513116142), INT32_C( 1236052991), INT32_C( 1840248385) }, + { { -INT32_C( 2063350484), INT32_C( 1601798949), INT32_C( 1023976609), INT32_C( 1236052991) }, + { -INT32_C( 992927986), INT32_C( 2037651963), INT32_C( 513116142), INT32_C( 1840248385) } }, + }, + { { -INT32_C( 890064197), -INT32_C( 141638702), INT32_C( 1113000007), -INT32_C( 1027880159), + INT32_C( 1593820015), INT32_C( 1249678667), -INT32_C( 2121062336), -INT32_C( 621854177) }, + { { -INT32_C( 890064197), INT32_C( 1113000007), INT32_C( 1593820015), -INT32_C( 2121062336) }, + { -INT32_C( 141638702), -INT32_C( 1027880159), INT32_C( 1249678667), -INT32_C( 621854177) } }, + }, + { { -INT32_C( 962272780), -INT32_C( 323079259), INT32_C( 1546523963), INT32_C( 1327426016), + -INT32_C( 122872403), -INT32_C( 213767502), INT32_C( 1903482194), INT32_C( 189555479) }, + { { -INT32_C( 962272780), INT32_C( 1546523963), -INT32_C( 122872403), INT32_C( 1903482194) }, + { -INT32_C( 323079259), INT32_C( 1327426016), -INT32_C( 213767502), INT32_C( 189555479) } }, + }, + { { -INT32_C( 355274683), INT32_C( 1591119907), -INT32_C( 2051406683), -INT32_C( 1697326867), + -INT32_C( 1466793226), -INT32_C( 23341908), -INT32_C( 1066397527), -INT32_C( 1194541965) }, + { { -INT32_C( 355274683), -INT32_C( 2051406683), -INT32_C( 1466793226), -INT32_C( 1066397527) }, + { INT32_C( 1591119907), -INT32_C( 1697326867), -INT32_C( 23341908), -INT32_C( 1194541965) } }, + }, + { { -INT32_C( 794648916), -INT32_C( 751929298), INT32_C( 1784211836), -INT32_C( 1224463167), + INT32_C( 1516279726), INT32_C( 358218603), INT32_C( 2144717067), INT32_C( 825729413) }, + { { -INT32_C( 794648916), INT32_C( 1784211836), INT32_C( 1516279726), INT32_C( 2144717067) }, + { -INT32_C( 751929298), -INT32_C( 1224463167), INT32_C( 358218603), INT32_C( 825729413) } }, + }, + { { INT32_C( 1828837951), -INT32_C( 817876910), -INT32_C( 633759719), INT32_C( 1938963909), + INT32_C( 1087304404), -INT32_C( 111859731), INT32_C( 1970809584), INT32_C( 195473356) }, + { { INT32_C( 1828837951), -INT32_C( 633759719), INT32_C( 1087304404), INT32_C( 1970809584) }, + { -INT32_C( 817876910), INT32_C( 1938963909), -INT32_C( 111859731), INT32_C( 195473356) } }, + }, + { { -INT32_C( 596006775), -INT32_C( 240404008), INT32_C( 399238226), -INT32_C( 158704351), + INT32_C( 1026971727), INT32_C( 1865845631), -INT32_C( 2115719499), -INT32_C( 410154147) }, + { { -INT32_C( 596006775), INT32_C( 399238226), INT32_C( 1026971727), -INT32_C( 2115719499) }, + { -INT32_C( 240404008), -INT32_C( 158704351), INT32_C( 1865845631), -INT32_C( 410154147) } }, + } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4x2_t r = simde_vld2q_s32(test_vec[i].a); + + simde_int32x4x2_t expected = { + {simde_vld1q_s32(test_vec[i].r[0]), simde_vld1q_s32(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_i32x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i32x4(r.val[1], expected.val[1]); + } + + return 0; +#else + for (int i = 0 ; i < 8 ; i++) { + simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); + simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); + simde_int32x4x2_t c = {{a, b}}; + + simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + + int32_t buf[8]; + simde_memcpy(buf, c.val, sizeof(buf)); + simde_int32x4x2_t r = simde_vld2q_s32(buf); + + simde_test_arm_neon_write_i32x4x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vld2q_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int64_t a[4]; + int64_t r[2][2]; + } test_vec[] = { + { { INT64_C( 761675503690497765), -INT64_C( 3317868187565492127), + INT64_C( 6422292174704708047), -INT64_C( 4276746329755678056) }, + { { INT64_C( 761675503690497765), INT64_C( 6422292174704708047) }, + { -INT64_C( 3317868187565492127), -INT64_C( 4276746329755678056) } }, + }, + { { -INT64_C( 6949583921125508177), -INT64_C( 8908427146440727274), + -INT64_C( 7890751878801495695), -INT64_C( 7775319739955481739) }, + { { -INT64_C( 6949583921125508177), -INT64_C( 7890751878801495695) }, + { -INT64_C( 8908427146440727274), -INT64_C( 7775319739955481739) } }, + }, + { { -INT64_C( 2838209209255287618), -INT64_C( 1655702365654380340), + INT64_C( 3438371571634163500), -INT64_C( 2812327433930041729) }, + { { -INT64_C( 2838209209255287618), INT64_C( 3438371571634163500) }, + { -INT64_C( 1655702365654380340), -INT64_C( 2812327433930041729) } }, + }, + { { -INT64_C( 8945432303512580032), INT64_C( 3414021002538270720), + INT64_C( 8881675687628664665), -INT64_C( 6803857797262883422) }, + { { -INT64_C( 8945432303512580032), INT64_C( 8881675687628664665) }, + { INT64_C( 3414021002538270720), -INT64_C( 6803857797262883422) } }, + }, + { { -INT64_C( 2800536549414704921), -INT64_C( 8435045075786848322), + -INT64_C( 893436412045671134), INT64_C( 3938470781334223014) }, + { { -INT64_C( 2800536549414704921), -INT64_C( 893436412045671134) }, + { -INT64_C( 8435045075786848322), INT64_C( 3938470781334223014) } }, + }, + { { -INT64_C( 8621023513008583766), -INT64_C( 9017831118517654241), + -INT64_C( 17106915664299905), INT64_C( 6789087051402080945) }, + { { -INT64_C( 8621023513008583766), -INT64_C( 17106915664299905) }, + { -INT64_C( 9017831118517654241), INT64_C( 6789087051402080945) } }, + }, + { { INT64_C( 2543473880721263865), INT64_C( 5821833599798974185), + INT64_C( 6277706328444005574), -INT64_C( 3717564459626585002) }, + { { INT64_C( 2543473880721263865), INT64_C( 6277706328444005574) }, + { INT64_C( 5821833599798974185), -INT64_C( 3717564459626585002) } }, + }, + { { INT64_C( 7909406733251856539), INT64_C( 5445558404010882673), + -INT64_C( 1375135456396922130), INT64_C( 7731658909898130093) }, + { { INT64_C( 7909406733251856539), -INT64_C( 1375135456396922130) }, + { INT64_C( 5445558404010882673), INT64_C( 7731658909898130093) } }, + } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2x2_t r = simde_vld2q_s64(test_vec[i].a); + + simde_int64x2x2_t expected = { + {simde_vld1q_s64(test_vec[i].r[0]), simde_vld1q_s64(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_i64x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i64x2(r.val[1], expected.val[1]); + } + + return 0; +#else + for (int i = 0 ; i < 8 ; i++) { + simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); + simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); + simde_int64x2x2_t c = {{a, b}}; + + simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + + int64_t buf[4]; + simde_memcpy(buf, c.val, sizeof(buf)); + simde_int64x2x2_t r = simde_vld2q_s64(buf); + + simde_test_arm_neon_write_i64x2x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vld2q_u8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint8_t a[32]; + uint8_t r[2][16]; + } test_vec[] = { + { { UINT8_C( 56), UINT8_C(119), UINT8_C(167), UINT8_C( 99), UINT8_C(221), UINT8_C(126), UINT8_C(139), UINT8_C( 94), + UINT8_C( 0), UINT8_C(203), UINT8_C(216), UINT8_C( 77), UINT8_C( 79), UINT8_C(152), UINT8_C( 2), UINT8_C(187), + UINT8_C(173), UINT8_C(251), UINT8_C( 63), UINT8_C( 57), UINT8_C(133), UINT8_C( 35), UINT8_C(243), UINT8_C(166), + UINT8_C( 51), UINT8_C(149), UINT8_C(128), UINT8_C( 99), UINT8_C(176), UINT8_C(195), UINT8_C( 30), UINT8_C(232) }, + { { UINT8_C( 56), UINT8_C(167), UINT8_C(221), UINT8_C(139), UINT8_C( 0), UINT8_C(216), UINT8_C( 79), UINT8_C( 2), + UINT8_C(173), UINT8_C( 63), UINT8_C(133), UINT8_C(243), UINT8_C( 51), UINT8_C(128), UINT8_C(176), UINT8_C( 30) }, + { UINT8_C(119), UINT8_C( 99), UINT8_C(126), UINT8_C( 94), UINT8_C(203), UINT8_C( 77), UINT8_C(152), UINT8_C(187), + UINT8_C(251), UINT8_C( 57), UINT8_C( 35), UINT8_C(166), UINT8_C(149), UINT8_C( 99), UINT8_C(195), UINT8_C(232) } }, + }, + { { UINT8_C( 58), UINT8_C(198), UINT8_C( 75), UINT8_C( 24), UINT8_C( 68), UINT8_C(214), UINT8_C(118), UINT8_C( 68), + UINT8_C(161), UINT8_C( 78), UINT8_C(145), UINT8_C(240), UINT8_C(231), UINT8_C(148), UINT8_C(172), UINT8_C(148), + UINT8_C(143), UINT8_C(235), UINT8_C(205), UINT8_C( 20), UINT8_C( 14), UINT8_C(192), UINT8_C(186), UINT8_C( 65), + UINT8_C( 85), UINT8_C( 58), UINT8_C(164), UINT8_C( 5), UINT8_C(254), UINT8_C(195), UINT8_C(237), UINT8_C( 56) }, + { { UINT8_C( 58), UINT8_C( 75), UINT8_C( 68), UINT8_C(118), UINT8_C(161), UINT8_C(145), UINT8_C(231), UINT8_C(172), + UINT8_C(143), UINT8_C(205), UINT8_C( 14), UINT8_C(186), UINT8_C( 85), UINT8_C(164), UINT8_C(254), UINT8_C(237) }, + { UINT8_C(198), UINT8_C( 24), UINT8_C(214), UINT8_C( 68), UINT8_C( 78), UINT8_C(240), UINT8_C(148), UINT8_C(148), + UINT8_C(235), UINT8_C( 20), UINT8_C(192), UINT8_C( 65), UINT8_C( 58), UINT8_C( 5), UINT8_C(195), UINT8_C( 56) } }, + }, + { { UINT8_C(137), UINT8_C( 56), UINT8_C( 80), UINT8_C(205), UINT8_C( 15), UINT8_C(199), UINT8_C( 18), UINT8_C(176), + UINT8_C( 21), UINT8_C(163), UINT8_C(161), UINT8_C(252), UINT8_C( 55), UINT8_C( 77), UINT8_C(144), UINT8_C(198), + UINT8_C( 56), UINT8_C( 93), UINT8_C(219), UINT8_C( 70), UINT8_C( 29), UINT8_C(149), UINT8_C(135), UINT8_C(115), + UINT8_C(208), UINT8_C( 43), UINT8_C(120), UINT8_C(206), UINT8_C(238), UINT8_C(102), UINT8_C( 6), UINT8_C(119) }, + { { UINT8_C(137), UINT8_C( 80), UINT8_C( 15), UINT8_C( 18), UINT8_C( 21), UINT8_C(161), UINT8_C( 55), UINT8_C(144), + UINT8_C( 56), UINT8_C(219), UINT8_C( 29), UINT8_C(135), UINT8_C(208), UINT8_C(120), UINT8_C(238), UINT8_C( 6) }, + { UINT8_C( 56), UINT8_C(205), UINT8_C(199), UINT8_C(176), UINT8_C(163), UINT8_C(252), UINT8_C( 77), UINT8_C(198), + UINT8_C( 93), UINT8_C( 70), UINT8_C(149), UINT8_C(115), UINT8_C( 43), UINT8_C(206), UINT8_C(102), UINT8_C(119) } }, + }, + { { UINT8_C(158), UINT8_C( 87), UINT8_C( 69), UINT8_C(173), UINT8_C( 30), UINT8_C( 87), UINT8_C( 94), UINT8_C( 51), + UINT8_C(250), UINT8_MAX, UINT8_C( 48), UINT8_C( 50), UINT8_C( 76), UINT8_C(192), UINT8_C(248), UINT8_C(132), + UINT8_C( 30), UINT8_C(211), UINT8_C(202), UINT8_C( 59), UINT8_C(105), UINT8_C( 81), UINT8_C(174), UINT8_C( 57), + UINT8_C(124), UINT8_C( 39), UINT8_C( 7), UINT8_C(107), UINT8_C(141), UINT8_C( 13), UINT8_C(226), UINT8_C( 43) }, + { { UINT8_C(158), UINT8_C( 69), UINT8_C( 30), UINT8_C( 94), UINT8_C(250), UINT8_C( 48), UINT8_C( 76), UINT8_C(248), + UINT8_C( 30), UINT8_C(202), UINT8_C(105), UINT8_C(174), UINT8_C(124), UINT8_C( 7), UINT8_C(141), UINT8_C(226) }, + { UINT8_C( 87), UINT8_C(173), UINT8_C( 87), UINT8_C( 51), UINT8_MAX, UINT8_C( 50), UINT8_C(192), UINT8_C(132), + UINT8_C(211), UINT8_C( 59), UINT8_C( 81), UINT8_C( 57), UINT8_C( 39), UINT8_C(107), UINT8_C( 13), UINT8_C( 43) } }, + }, + { { UINT8_C(100), UINT8_C( 39), UINT8_C(217), UINT8_C(130), UINT8_C(126), UINT8_C( 55), UINT8_C(182), UINT8_C(121), + UINT8_C( 54), UINT8_C(230), UINT8_C(171), UINT8_C(130), UINT8_C(166), UINT8_C(163), UINT8_C( 6), UINT8_C(196), + UINT8_C(119), UINT8_C(208), UINT8_C( 0), UINT8_C(224), UINT8_C( 33), UINT8_C(174), UINT8_C( 25), UINT8_C(157), + UINT8_C(213), UINT8_C( 32), UINT8_C( 8), UINT8_C( 98), UINT8_C( 45), UINT8_C(235), UINT8_C(142), UINT8_C(146) }, + { { UINT8_C(100), UINT8_C(217), UINT8_C(126), UINT8_C(182), UINT8_C( 54), UINT8_C(171), UINT8_C(166), UINT8_C( 6), + UINT8_C(119), UINT8_C( 0), UINT8_C( 33), UINT8_C( 25), UINT8_C(213), UINT8_C( 8), UINT8_C( 45), UINT8_C(142) }, + { UINT8_C( 39), UINT8_C(130), UINT8_C( 55), UINT8_C(121), UINT8_C(230), UINT8_C(130), UINT8_C(163), UINT8_C(196), + UINT8_C(208), UINT8_C(224), UINT8_C(174), UINT8_C(157), UINT8_C( 32), UINT8_C( 98), UINT8_C(235), UINT8_C(146) } }, + }, + { { UINT8_C( 18), UINT8_C(103), UINT8_C( 20), UINT8_C(145), UINT8_C(158), UINT8_C(202), UINT8_C( 10), UINT8_C(212), + UINT8_C(176), UINT8_C(181), UINT8_C( 86), UINT8_C( 87), UINT8_C( 88), UINT8_C( 92), UINT8_C( 27), UINT8_C(207), + UINT8_C( 44), UINT8_C( 27), UINT8_C(175), UINT8_C( 77), UINT8_C(202), UINT8_C(200), UINT8_C(234), UINT8_C(159), + UINT8_C(232), UINT8_C(243), UINT8_C( 2), UINT8_C( 22), UINT8_C(222), UINT8_C(144), UINT8_C(168), UINT8_C(240) }, + { { UINT8_C( 18), UINT8_C( 20), UINT8_C(158), UINT8_C( 10), UINT8_C(176), UINT8_C( 86), UINT8_C( 88), UINT8_C( 27), + UINT8_C( 44), UINT8_C(175), UINT8_C(202), UINT8_C(234), UINT8_C(232), UINT8_C( 2), UINT8_C(222), UINT8_C(168) }, + { UINT8_C(103), UINT8_C(145), UINT8_C(202), UINT8_C(212), UINT8_C(181), UINT8_C( 87), UINT8_C( 92), UINT8_C(207), + UINT8_C( 27), UINT8_C( 77), UINT8_C(200), UINT8_C(159), UINT8_C(243), UINT8_C( 22), UINT8_C(144), UINT8_C(240) } }, + }, + { { UINT8_C(247), UINT8_C(188), UINT8_C(129), UINT8_C(149), UINT8_C(135), UINT8_C(139), UINT8_C(105), UINT8_C( 55), + UINT8_C( 64), UINT8_C(191), UINT8_C(142), UINT8_C(153), UINT8_C( 27), UINT8_C(170), UINT8_C(104), UINT8_C( 71), + UINT8_C(197), UINT8_C( 24), UINT8_C(148), UINT8_C(143), UINT8_C(224), UINT8_C(126), UINT8_C( 47), UINT8_C(201), + UINT8_C(113), UINT8_C( 49), UINT8_C(223), UINT8_C( 79), UINT8_C(193), UINT8_C(135), UINT8_C( 64), UINT8_C(184) }, + { { UINT8_C(247), UINT8_C(129), UINT8_C(135), UINT8_C(105), UINT8_C( 64), UINT8_C(142), UINT8_C( 27), UINT8_C(104), + UINT8_C(197), UINT8_C(148), UINT8_C(224), UINT8_C( 47), UINT8_C(113), UINT8_C(223), UINT8_C(193), UINT8_C( 64) }, + { UINT8_C(188), UINT8_C(149), UINT8_C(139), UINT8_C( 55), UINT8_C(191), UINT8_C(153), UINT8_C(170), UINT8_C( 71), + UINT8_C( 24), UINT8_C(143), UINT8_C(126), UINT8_C(201), UINT8_C( 49), UINT8_C( 79), UINT8_C(135), UINT8_C(184) } }, + }, + { { UINT8_C( 67), UINT8_C(193), UINT8_C( 77), UINT8_C(202), UINT8_C( 77), UINT8_C(182), UINT8_C( 2), UINT8_C(141), + UINT8_C(117), UINT8_C(144), UINT8_C( 38), UINT8_C(144), UINT8_C( 58), UINT8_C(143), UINT8_C(215), UINT8_C( 0), + UINT8_C(167), UINT8_C(107), UINT8_C(143), UINT8_C(135), UINT8_C(233), UINT8_C(190), UINT8_C( 80), UINT8_C( 91), + UINT8_C(239), UINT8_C( 47), UINT8_C(170), UINT8_C(176), UINT8_C(182), UINT8_C(234), UINT8_C(104), UINT8_C(250) }, + { { UINT8_C( 67), UINT8_C( 77), UINT8_C( 77), UINT8_C( 2), UINT8_C(117), UINT8_C( 38), UINT8_C( 58), UINT8_C(215), + UINT8_C(167), UINT8_C(143), UINT8_C(233), UINT8_C( 80), UINT8_C(239), UINT8_C(170), UINT8_C(182), UINT8_C(104) }, + { UINT8_C(193), UINT8_C(202), UINT8_C(182), UINT8_C(141), UINT8_C(144), UINT8_C(144), UINT8_C(143), UINT8_C( 0), + UINT8_C(107), UINT8_C(135), UINT8_C(190), UINT8_C( 91), UINT8_C( 47), UINT8_C(176), UINT8_C(234), UINT8_C(250) } }, + }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x16x2_t r = simde_vld2q_u8(test_vec[i].a); + + simde_uint8x16x2_t expected = { + {simde_vld1q_u8(test_vec[i].r[0]), simde_vld1q_u8(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_u8x16(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u8x16(r.val[1], expected.val[1]); + } + + return 0; +#else + for (int i = 0 ; i < 8 ; i++) { + simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); + simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); + simde_uint8x16x2_t c = {{a, b}}; + + simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + + uint8_t buf[32]; + simde_memcpy(buf, c.val, sizeof(buf)); + simde_uint8x16x2_t r = simde_vld2q_u8(buf); + + simde_test_arm_neon_write_u8x16x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vld2q_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint16_t a[16]; + uint16_t r[2][8]; + } test_vec[] = { + { { UINT16_C( 5263), UINT16_C(46704), UINT16_C(17324), UINT16_C( 435), UINT16_C(22826), UINT16_C(35226), UINT16_C( 4289), UINT16_C(14289), + UINT16_C(15842), UINT16_C(32624), UINT16_C( 9166), UINT16_C(50530), UINT16_C(15251), UINT16_C(37458), UINT16_C(64003), UINT16_C(37377) }, + { { UINT16_C( 5263), UINT16_C(17324), UINT16_C(22826), UINT16_C( 4289), UINT16_C(15842), UINT16_C( 9166), UINT16_C(15251), UINT16_C(64003) }, + { UINT16_C(46704), UINT16_C( 435), UINT16_C(35226), UINT16_C(14289), UINT16_C(32624), UINT16_C(50530), UINT16_C(37458), UINT16_C(37377) } }, + }, + { { UINT16_C(28943), UINT16_C(47944), UINT16_C(64436), UINT16_C(57020), UINT16_C(22100), UINT16_C( 5480), UINT16_C(14695), UINT16_C(18765), + UINT16_C(48502), UINT16_C(17865), UINT16_C(11232), UINT16_C(29450), UINT16_C(23654), UINT16_C(26886), UINT16_C( 1879), UINT16_C(26363) }, + { { UINT16_C(28943), UINT16_C(64436), UINT16_C(22100), UINT16_C(14695), UINT16_C(48502), UINT16_C(11232), UINT16_C(23654), UINT16_C( 1879) }, + { UINT16_C(47944), UINT16_C(57020), UINT16_C( 5480), UINT16_C(18765), UINT16_C(17865), UINT16_C(29450), UINT16_C(26886), UINT16_C(26363) } }, + }, + { { UINT16_C(17528), UINT16_C(11297), UINT16_C(56639), UINT16_C(37899), UINT16_C(29491), UINT16_C(39593), UINT16_C(63148), UINT16_C( 8932), + UINT16_C(44468), UINT16_C(37991), UINT16_C(29144), UINT16_C(16136), UINT16_C( 3790), UINT16_C( 9640), UINT16_C(42005), UINT16_C(36235) }, + { { UINT16_C(17528), UINT16_C(56639), UINT16_C(29491), UINT16_C(63148), UINT16_C(44468), UINT16_C(29144), UINT16_C( 3790), UINT16_C(42005) }, + { UINT16_C(11297), UINT16_C(37899), UINT16_C(39593), UINT16_C( 8932), UINT16_C(37991), UINT16_C(16136), UINT16_C( 9640), UINT16_C(36235) } }, + }, + { { UINT16_C(44264), UINT16_C(10169), UINT16_C(50313), UINT16_C(48315), UINT16_C(25911), UINT16_C(58199), UINT16_C(15195), UINT16_C( 3846), + UINT16_C(28136), UINT16_C(49316), UINT16_C(44255), UINT16_C(44543), UINT16_C(43194), UINT16_C(53202), UINT16_C(23884), UINT16_C(13404) }, + { { UINT16_C(44264), UINT16_C(50313), UINT16_C(25911), UINT16_C(15195), UINT16_C(28136), UINT16_C(44255), UINT16_C(43194), UINT16_C(23884) }, + { UINT16_C(10169), UINT16_C(48315), UINT16_C(58199), UINT16_C( 3846), UINT16_C(49316), UINT16_C(44543), UINT16_C(53202), UINT16_C(13404) } }, + }, + { { UINT16_C( 5385), UINT16_C(37467), UINT16_C( 6106), UINT16_C( 4430), UINT16_C(42364), UINT16_C(55285), UINT16_C(64480), UINT16_C(51431), + UINT16_C(35688), UINT16_C(18313), UINT16_C(34871), UINT16_C(61940), UINT16_C(50736), UINT16_C(31936), UINT16_C( 7203), UINT16_C(11440) }, + { { UINT16_C( 5385), UINT16_C( 6106), UINT16_C(42364), UINT16_C(64480), UINT16_C(35688), UINT16_C(34871), UINT16_C(50736), UINT16_C( 7203) }, + { UINT16_C(37467), UINT16_C( 4430), UINT16_C(55285), UINT16_C(51431), UINT16_C(18313), UINT16_C(61940), UINT16_C(31936), UINT16_C(11440) } }, + }, + { { UINT16_C( 3121), UINT16_C( 3006), UINT16_C( 3363), UINT16_C(40733), UINT16_C( 4786), UINT16_C(37750), UINT16_C(23821), UINT16_C(30043), + UINT16_C(58600), UINT16_C( 8125), UINT16_C(45421), UINT16_C(40208), UINT16_C(53368), UINT16_C(39706), UINT16_C(51948), UINT16_C( 7880) }, + { { UINT16_C( 3121), UINT16_C( 3363), UINT16_C( 4786), UINT16_C(23821), UINT16_C(58600), UINT16_C(45421), UINT16_C(53368), UINT16_C(51948) }, + { UINT16_C( 3006), UINT16_C(40733), UINT16_C(37750), UINT16_C(30043), UINT16_C( 8125), UINT16_C(40208), UINT16_C(39706), UINT16_C( 7880) } }, + }, + { { UINT16_C(34518), UINT16_C(63785), UINT16_C(18067), UINT16_C(18072), UINT16_C( 3928), UINT16_C(26073), UINT16_C(13420), UINT16_C(21979), + UINT16_C(38937), UINT16_C(34420), UINT16_C(34121), UINT16_C(49443), UINT16_C(15701), UINT16_C(16989), UINT16_C( 9480), UINT16_C(56928) }, + { { UINT16_C(34518), UINT16_C(18067), UINT16_C( 3928), UINT16_C(13420), UINT16_C(38937), UINT16_C(34121), UINT16_C(15701), UINT16_C( 9480) }, + { UINT16_C(63785), UINT16_C(18072), UINT16_C(26073), UINT16_C(21979), UINT16_C(34420), UINT16_C(49443), UINT16_C(16989), UINT16_C(56928) } }, + }, + { { UINT16_C(35243), UINT16_C(16344), UINT16_C(28880), UINT16_C(10373), UINT16_C(24191), UINT16_C(60558), UINT16_C(27026), UINT16_C(43841), + UINT16_C(46337), UINT16_C(18993), UINT16_C(21818), UINT16_C(36876), UINT16_C(27026), UINT16_C(39634), UINT16_C(12942), UINT16_C(14713) }, + { { UINT16_C(35243), UINT16_C(28880), UINT16_C(24191), UINT16_C(27026), UINT16_C(46337), UINT16_C(21818), UINT16_C(27026), UINT16_C(12942) }, + { UINT16_C(16344), UINT16_C(10373), UINT16_C(60558), UINT16_C(43841), UINT16_C(18993), UINT16_C(36876), UINT16_C(39634), UINT16_C(14713) } }, + }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x8x2_t r = simde_vld2q_u16(test_vec[i].a); + + simde_uint16x8x2_t expected = { + {simde_vld1q_u16(test_vec[i].r[0]), simde_vld1q_u16(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_u16x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u16x8(r.val[1], expected.val[1]); + } + + return 0; +#else + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); + simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); + simde_uint16x8x2_t c = {{a, b}}; + + simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + + uint16_t buf[16]; + simde_memcpy(buf, c.val, sizeof(buf)); + simde_uint16x8x2_t r = simde_vld2q_u16(buf); + + simde_test_arm_neon_write_u16x8x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vld2q_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint32_t a[8]; + uint32_t r[2][4]; + } test_vec[] = { + { { UINT32_C(2339918267), UINT32_C(1102380481), UINT32_C(3995943515), UINT32_C(2895736491), + UINT32_C(1593232163), UINT32_C(3001942560), UINT32_C(4182622315), UINT32_C(2905851634) }, + { { UINT32_C(2339918267), UINT32_C(3995943515), UINT32_C(1593232163), UINT32_C(4182622315) }, + { UINT32_C(1102380481), UINT32_C(2895736491), UINT32_C(3001942560), UINT32_C(2905851634) } }, + }, + { { UINT32_C(3627658007), UINT32_C( 68808105), UINT32_C(3673310767), UINT32_C(3632696500), + UINT32_C(2000059479), UINT32_C(3928564863), UINT32_C(3605296868), UINT32_C(1401100092) }, + { { UINT32_C(3627658007), UINT32_C(3673310767), UINT32_C(2000059479), UINT32_C(3605296868) }, + { UINT32_C( 68808105), UINT32_C(3632696500), UINT32_C(3928564863), UINT32_C(1401100092) } }, + }, + { { UINT32_C(1798094018), UINT32_C(3631236521), UINT32_C(1085432460), UINT32_C(1159215342), + UINT32_C( 884756149), UINT32_C(1444865650), UINT32_C(2569798236), UINT32_C(3706499097) }, + { { UINT32_C(1798094018), UINT32_C(1085432460), UINT32_C( 884756149), UINT32_C(2569798236) }, + { UINT32_C(3631236521), UINT32_C(1159215342), UINT32_C(1444865650), UINT32_C(3706499097) } }, + }, + { { UINT32_C( 373758060), UINT32_C(3941513054), UINT32_C( 137011482), UINT32_C(2387493849), + UINT32_C( 79825553), UINT32_C(1297801712), UINT32_C(4259743715), UINT32_C(2748961335) }, + { { UINT32_C( 373758060), UINT32_C( 137011482), UINT32_C( 79825553), UINT32_C(4259743715) }, + { UINT32_C(3941513054), UINT32_C(2387493849), UINT32_C(1297801712), UINT32_C(2748961335) } }, + }, + { { UINT32_C(1236869355), UINT32_C(4063471832), UINT32_C( 586833225), UINT32_C( 850479264), + UINT32_C(1127641939), UINT32_C( 948998228), UINT32_C(1312126487), UINT32_C( 888278601) }, + { { UINT32_C(1236869355), UINT32_C( 586833225), UINT32_C(1127641939), UINT32_C(1312126487) }, + { UINT32_C(4063471832), UINT32_C( 850479264), UINT32_C( 948998228), UINT32_C( 888278601) } }, + }, + { { UINT32_C( 108899118), UINT32_C(2633543763), UINT32_C(2931815181), UINT32_C(2397073467), + UINT32_C( 953292515), UINT32_C(3195036326), UINT32_C( 571254233), UINT32_C(3780574899) }, + { { UINT32_C( 108899118), UINT32_C(2931815181), UINT32_C( 953292515), UINT32_C( 571254233) }, + { UINT32_C(2633543763), UINT32_C(2397073467), UINT32_C(3195036326), UINT32_C(3780574899) } }, + }, + { { UINT32_C(4259894186), UINT32_C(2426069123), UINT32_C( 255744467), UINT32_C(2895978185), + UINT32_C(3689180980), UINT32_C(2878952658), UINT32_C(2899158521), UINT32_C(1317938084) }, + { { UINT32_C(4259894186), UINT32_C( 255744467), UINT32_C(3689180980), UINT32_C(2899158521) }, + { UINT32_C(2426069123), UINT32_C(2895978185), UINT32_C(2878952658), UINT32_C(1317938084) } }, + }, + { { UINT32_C(2034988790), UINT32_C( 705291606), UINT32_C( 121194558), UINT32_C(2612319846), + UINT32_C( 410425414), UINT32_C(3871543277), UINT32_C(1486065844), UINT32_C(2846237107) }, + { { UINT32_C(2034988790), UINT32_C( 121194558), UINT32_C( 410425414), UINT32_C(1486065844) }, + { UINT32_C( 705291606), UINT32_C(2612319846), UINT32_C(3871543277), UINT32_C(2846237107) } }, + }, + + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4x2_t r = simde_vld2q_u32(test_vec[i].a); + + simde_uint32x4x2_t expected = { + {simde_vld1q_u32(test_vec[i].r[0]), simde_vld1q_u32(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_u32x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u32x4(r.val[1], expected.val[1]); + } + + return 0; +#else + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); + simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); + simde_uint32x4x2_t c = {{a, b}}; + + simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + + uint32_t buf[8]; + simde_memcpy(buf, c.val, sizeof(buf)); + simde_uint32x4x2_t r = simde_vld2q_u32(buf); + + simde_test_arm_neon_write_u32x4x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vld2q_u64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint64_t a[4]; + uint64_t r[2][2]; + } test_vec[] = { + { { UINT64_C( 3020762151839533812), UINT64_C(17954495856467081562), + UINT64_C(14288482954718013222), UINT64_C( 3819020876812341264) }, + { { UINT64_C( 3020762151839533812), UINT64_C(14288482954718013222) }, + { UINT64_C(17954495856467081562), UINT64_C( 3819020876812341264) } }, + }, + { { UINT64_C(10899347977887241965), UINT64_C( 9840223772233446588), + UINT64_C(12950102532167286886), UINT64_C(13530663546384542545) }, + { { UINT64_C(10899347977887241965), UINT64_C(12950102532167286886) }, + { UINT64_C( 9840223772233446588), UINT64_C(13530663546384542545) } }, + }, + { { UINT64_C( 2070555630402543080), UINT64_C(17741159496252854347), + UINT64_C( 4492799045846756354), UINT64_C( 7500346603649101196) }, + { { UINT64_C( 2070555630402543080), UINT64_C( 4492799045846756354) }, + { UINT64_C(17741159496252854347), UINT64_C( 7500346603649101196) } }, + }, + { { UINT64_C( 5489969314248125107), UINT64_C( 2945686371667927898), + UINT64_C(17745633243074316570), UINT64_C( 6772400822477133076) }, + { { UINT64_C( 5489969314248125107), UINT64_C(17745633243074316570) }, + { UINT64_C( 2945686371667927898), UINT64_C( 6772400822477133076) } }, + }, + { { UINT64_C( 3359581776035023185), UINT64_C(16272061564597082244), + UINT64_C(15790516151494746051), UINT64_C( 3119705754931524419) }, + { { UINT64_C( 3359581776035023185), UINT64_C(15790516151494746051) }, + { UINT64_C(16272061564597082244), UINT64_C( 3119705754931524419) } }, + }, + { { UINT64_C(16282232691925826805), UINT64_C(14653969954052444085), + UINT64_C(13303100541911975676), UINT64_C( 7968260244002705039) }, + { { UINT64_C(16282232691925826805), UINT64_C(13303100541911975676) }, + { UINT64_C(14653969954052444085), UINT64_C( 7968260244002705039) } }, + }, + { { UINT64_C(16423833091886748078), UINT64_C(15064748377732249660), + UINT64_C(12911875263894391043), UINT64_C( 8586307070039217984) }, + { { UINT64_C(16423833091886748078), UINT64_C(12911875263894391043) }, + { UINT64_C(15064748377732249660), UINT64_C( 8586307070039217984) } }, + }, + { { UINT64_C(14543092476148932546), UINT64_C(17406166195061535465), + UINT64_C(10808808801129014056), UINT64_C(10354190940655066225) }, + { { UINT64_C(14543092476148932546), UINT64_C(10808808801129014056) }, + { UINT64_C(17406166195061535465), UINT64_C(10354190940655066225) } }, + } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2x2_t r = simde_vld2q_u64(test_vec[i].a); + + simde_uint64x2x2_t expected = { + {simde_vld1q_u64(test_vec[i].r[0]), simde_vld1q_u64(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_u64x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u64x2(r.val[1], expected.val[1]); + } + + return 0; +#else + for (int i = 0 ; i < 8 ; i++) { + simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); + simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); + simde_uint64x2x2_t c = {{a, b}}; + + simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + + uint64_t buf[4]; + simde_memcpy(buf, c.val, sizeof(buf)); + simde_uint64x2x2_t r = simde_vld2q_u64(buf); + + simde_test_arm_neon_write_u64x2x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vld2q_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32_t a[8]; + simde_float32_t r[2][4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 793.71), SIMDE_FLOAT32_C( 221.62), SIMDE_FLOAT32_C( 781.57), SIMDE_FLOAT32_C( -525.52), + SIMDE_FLOAT32_C( 800.99), SIMDE_FLOAT32_C( -411.87), SIMDE_FLOAT32_C( 483.99), SIMDE_FLOAT32_C( 852.73) }, + { { SIMDE_FLOAT32_C( 793.71), SIMDE_FLOAT32_C( 781.57), SIMDE_FLOAT32_C( 800.99), SIMDE_FLOAT32_C( 483.99) }, + { SIMDE_FLOAT32_C( 221.62), SIMDE_FLOAT32_C( -525.52), SIMDE_FLOAT32_C( -411.87), SIMDE_FLOAT32_C( 852.73) } }, + }, + { { SIMDE_FLOAT32_C( -433.25), SIMDE_FLOAT32_C( -811.76), SIMDE_FLOAT32_C( -463.04), SIMDE_FLOAT32_C( 671.94), + SIMDE_FLOAT32_C( -60.94), SIMDE_FLOAT32_C( -434.20), SIMDE_FLOAT32_C( -263.89), SIMDE_FLOAT32_C( 754.63) }, + { { SIMDE_FLOAT32_C( -433.25), SIMDE_FLOAT32_C( -463.04), SIMDE_FLOAT32_C( -60.94), SIMDE_FLOAT32_C( -263.89) }, + { SIMDE_FLOAT32_C( -811.76), SIMDE_FLOAT32_C( 671.94), SIMDE_FLOAT32_C( -434.20), SIMDE_FLOAT32_C( 754.63) } }, + }, + { { SIMDE_FLOAT32_C( -877.85), SIMDE_FLOAT32_C( -225.98), SIMDE_FLOAT32_C( -292.03), SIMDE_FLOAT32_C( 932.32), + SIMDE_FLOAT32_C( -463.10), SIMDE_FLOAT32_C( 171.18), SIMDE_FLOAT32_C( -115.23), SIMDE_FLOAT32_C( -867.52) }, + { { SIMDE_FLOAT32_C( -877.85), SIMDE_FLOAT32_C( -292.03), SIMDE_FLOAT32_C( -463.10), SIMDE_FLOAT32_C( -115.23) }, + { SIMDE_FLOAT32_C( -225.98), SIMDE_FLOAT32_C( 932.32), SIMDE_FLOAT32_C( 171.18), SIMDE_FLOAT32_C( -867.52) } }, + }, + { { SIMDE_FLOAT32_C( -182.14), SIMDE_FLOAT32_C( 874.57), SIMDE_FLOAT32_C( -306.44), SIMDE_FLOAT32_C( 180.87), + SIMDE_FLOAT32_C( -827.59), SIMDE_FLOAT32_C( -449.53), SIMDE_FLOAT32_C( -593.39), SIMDE_FLOAT32_C( 966.11) }, + { { SIMDE_FLOAT32_C( -182.14), SIMDE_FLOAT32_C( -306.44), SIMDE_FLOAT32_C( -827.59), SIMDE_FLOAT32_C( -593.39) }, + { SIMDE_FLOAT32_C( 874.57), SIMDE_FLOAT32_C( 180.87), SIMDE_FLOAT32_C( -449.53), SIMDE_FLOAT32_C( 966.11) } }, + }, + { { SIMDE_FLOAT32_C( 772.09), SIMDE_FLOAT32_C( -811.82), SIMDE_FLOAT32_C( -559.40), SIMDE_FLOAT32_C( 573.08), + SIMDE_FLOAT32_C( -223.69), SIMDE_FLOAT32_C( 924.59), SIMDE_FLOAT32_C( 425.81), SIMDE_FLOAT32_C( 343.06) }, + { { SIMDE_FLOAT32_C( 772.09), SIMDE_FLOAT32_C( -559.40), SIMDE_FLOAT32_C( -223.69), SIMDE_FLOAT32_C( 425.81) }, + { SIMDE_FLOAT32_C( -811.82), SIMDE_FLOAT32_C( 573.08), SIMDE_FLOAT32_C( 924.59), SIMDE_FLOAT32_C( 343.06) } }, + }, + { { SIMDE_FLOAT32_C( -887.17), SIMDE_FLOAT32_C( 962.76), SIMDE_FLOAT32_C( 15.01), SIMDE_FLOAT32_C( 51.89), + SIMDE_FLOAT32_C( -471.44), SIMDE_FLOAT32_C( 751.11), SIMDE_FLOAT32_C( -193.48), SIMDE_FLOAT32_C( -349.29) }, + { { SIMDE_FLOAT32_C( -887.17), SIMDE_FLOAT32_C( 15.01), SIMDE_FLOAT32_C( -471.44), SIMDE_FLOAT32_C( -193.48) }, + { SIMDE_FLOAT32_C( 962.76), SIMDE_FLOAT32_C( 51.89), SIMDE_FLOAT32_C( 751.11), SIMDE_FLOAT32_C( -349.29) } }, + }, + { { SIMDE_FLOAT32_C( -474.87), SIMDE_FLOAT32_C( 514.49), SIMDE_FLOAT32_C( -416.97), SIMDE_FLOAT32_C( 62.03), + SIMDE_FLOAT32_C( -314.33), SIMDE_FLOAT32_C( 467.80), SIMDE_FLOAT32_C( 194.51), SIMDE_FLOAT32_C( 503.53) }, + { { SIMDE_FLOAT32_C( -474.87), SIMDE_FLOAT32_C( -416.97), SIMDE_FLOAT32_C( -314.33), SIMDE_FLOAT32_C( 194.51) }, + { SIMDE_FLOAT32_C( 514.49), SIMDE_FLOAT32_C( 62.03), SIMDE_FLOAT32_C( 467.80), SIMDE_FLOAT32_C( 503.53) } }, + }, + { { SIMDE_FLOAT32_C( 342.37), SIMDE_FLOAT32_C( 888.07), SIMDE_FLOAT32_C( -315.60), SIMDE_FLOAT32_C( 514.77), + SIMDE_FLOAT32_C( -561.46), SIMDE_FLOAT32_C( 91.01), SIMDE_FLOAT32_C( 480.89), SIMDE_FLOAT32_C( -789.37) }, + { { SIMDE_FLOAT32_C( 342.37), SIMDE_FLOAT32_C( -315.60), SIMDE_FLOAT32_C( -561.46), SIMDE_FLOAT32_C( 480.89) }, + { SIMDE_FLOAT32_C( 888.07), SIMDE_FLOAT32_C( 514.77), SIMDE_FLOAT32_C( 91.01), SIMDE_FLOAT32_C( -789.37) } }, + }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4x2_t r = simde_vld2q_f32(test_vec[i].a); + + simde_float32x4x2_t expected = { + {simde_vld1q_f32(test_vec[i].r[0]), simde_vld1q_f32(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_f32x4(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f32x4(r.val[1], expected.val[1], INT_MAX); + } + + return 0; +#else + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4x2_t c = {{a, b}}; + + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + + simde_float32_t buf[8]; + simde_memcpy(buf, c.val, sizeof(buf)); + simde_float32x4x2_t r = simde_vld2q_f32(buf); + + simde_test_arm_neon_write_f32x4x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vld2q_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float64_t a[4]; + simde_float64_t r[2][2]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( -81.12), SIMDE_FLOAT64_C( -90.46), + SIMDE_FLOAT64_C( -83.90), SIMDE_FLOAT64_C( 20.75) }, + { { SIMDE_FLOAT64_C( -81.12), SIMDE_FLOAT64_C( -83.90) }, + { SIMDE_FLOAT64_C( -90.46), SIMDE_FLOAT64_C( 20.75) } }, + }, + { { SIMDE_FLOAT64_C( -91.92), SIMDE_FLOAT64_C( 5.15), + SIMDE_FLOAT64_C( -58.53), SIMDE_FLOAT64_C( -40.61) }, + { { SIMDE_FLOAT64_C( -91.92), SIMDE_FLOAT64_C( -58.53) }, + { SIMDE_FLOAT64_C( 5.15), SIMDE_FLOAT64_C( -40.61) } }, + }, + { { SIMDE_FLOAT64_C( 92.56), SIMDE_FLOAT64_C( 91.44), + SIMDE_FLOAT64_C( 67.84), SIMDE_FLOAT64_C( -58.14) }, + { { SIMDE_FLOAT64_C( 92.56), SIMDE_FLOAT64_C( 67.84) }, + { SIMDE_FLOAT64_C( 91.44), SIMDE_FLOAT64_C( -58.14) } }, + }, + { { SIMDE_FLOAT64_C( 63.47), SIMDE_FLOAT64_C( 42.43), + SIMDE_FLOAT64_C( 29.58), SIMDE_FLOAT64_C( 4.03) }, + { { SIMDE_FLOAT64_C( 63.47), SIMDE_FLOAT64_C( 29.58) }, + { SIMDE_FLOAT64_C( 42.43), SIMDE_FLOAT64_C( 4.03) } }, + }, + { { SIMDE_FLOAT64_C( 0.29), SIMDE_FLOAT64_C( -69.19), + SIMDE_FLOAT64_C( -73.56), SIMDE_FLOAT64_C( -97.91) }, + { { SIMDE_FLOAT64_C( 0.29), SIMDE_FLOAT64_C( -73.56) }, + { SIMDE_FLOAT64_C( -69.19), SIMDE_FLOAT64_C( -97.91) } }, + }, + { { SIMDE_FLOAT64_C( -62.67), SIMDE_FLOAT64_C( -66.03), + SIMDE_FLOAT64_C( -17.85), SIMDE_FLOAT64_C( -62.88) }, + { { SIMDE_FLOAT64_C( -62.67), SIMDE_FLOAT64_C( -17.85) }, + { SIMDE_FLOAT64_C( -66.03), SIMDE_FLOAT64_C( -62.88) } }, + }, + { { SIMDE_FLOAT64_C( -24.49), SIMDE_FLOAT64_C( -93.09), + SIMDE_FLOAT64_C( 12.23), SIMDE_FLOAT64_C( 80.71) }, + { { SIMDE_FLOAT64_C( -24.49), SIMDE_FLOAT64_C( 12.23) }, + { SIMDE_FLOAT64_C( -93.09), SIMDE_FLOAT64_C( 80.71) } }, + }, + { { SIMDE_FLOAT64_C( -90.54), SIMDE_FLOAT64_C( 20.34), + SIMDE_FLOAT64_C( -11.66), SIMDE_FLOAT64_C( -71.66) }, + { { SIMDE_FLOAT64_C( -90.54), SIMDE_FLOAT64_C( -11.66) }, + { SIMDE_FLOAT64_C( 20.34), SIMDE_FLOAT64_C( -71.66) } }, + } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2x2_t r = simde_vld2q_f64(test_vec[i].a); + + simde_float64x2x2_t expected = { + {simde_vld1q_f64(test_vec[i].r[0]), simde_vld1q_f64(test_vec[i].r[1])}}; + + simde_test_arm_neon_assert_equal_f64x2(r.val[0], expected.val[0], 1); + simde_test_arm_neon_assert_equal_f64x2(r.val[1], expected.val[1], 1); + } + + return 0; +#else + for (int i = 0 ; i < 8 ; i++) { + simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-100.0, 100.0); + simde_float64x2_t b = simde_test_arm_neon_random_f64x2(-100.0, 100.0); + simde_float64x2x2_t c = {{a, b}}; + + simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + + simde_float64_t buf[4]; + simde_memcpy(buf, c.val, sizeof(buf)); + simde_float64x2x2_t r = simde_vld2q_f64(buf); + + simde_test_arm_neon_write_f64x2x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +*/ +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ +SIMDE_TEST_FUNC_LIST_BEGIN +#if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_f16) +//SIMDE_TEST_FUNC_LIST_ENTRY(vld2_s16) +//SIMDE_TEST_FUNC_LIST_ENTRY(vld2_s32) +//SIMDE_TEST_FUNC_LIST_ENTRY(vld2_s64) +//SIMDE_TEST_FUNC_LIST_ENTRY(vld2_u8) +//SIMDE_TEST_FUNC_LIST_ENTRY(vld2_u16) +//SIMDE_TEST_FUNC_LIST_ENTRY(vld2_u32) +//SIMDE_TEST_FUNC_LIST_ENTRY(vld2_u64) +//SIMDE_TEST_FUNC_LIST_ENTRY(vld2_f32) +//SIMDE_TEST_FUNC_LIST_ENTRY(vld2_f64) +//SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_u8) +//SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_s8) +//SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_s16) +//SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_s32) +//SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_s64) +//SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_u16) +//SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_u32) +//SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_u64) +//SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_f32) +//SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_f64) +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/ld3_dup.c b/test/arm/neon/ld3_dup.c new file mode 100644 index 000000000..359bcf050 --- /dev/null +++ b/test/arm/neon/ld3_dup.c @@ -0,0 +1,1569 @@ +#define SIMDE_TEST_ARM_NEON_INSN ld3_dup + +#include "test-neon.h" +#include "../../../simde/arm/neon/ld3_dup.h" + +#if !defined(SIMDE_BUG_INTEL_857088) + +static int +test_simde_vld3_dup_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[3]; + simde_float16_t unused[3]; + simde_float16_t r[3][4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-6.58), SIMDE_FLOAT16_VALUE(2.17), SIMDE_FLOAT16_VALUE(3.51) }, + { SIMDE_FLOAT16_VALUE(-28.23), SIMDE_FLOAT16_VALUE(27.18), SIMDE_FLOAT16_VALUE(21.06)}, + { { SIMDE_FLOAT16_VALUE(-6.58), SIMDE_FLOAT16_VALUE(-6.58), SIMDE_FLOAT16_VALUE(-6.58), SIMDE_FLOAT16_VALUE(-6.58) }, + { SIMDE_FLOAT16_VALUE(2.17), SIMDE_FLOAT16_VALUE(2.17), SIMDE_FLOAT16_VALUE(2.17), SIMDE_FLOAT16_VALUE(2.17) }, + { SIMDE_FLOAT16_VALUE(3.51), SIMDE_FLOAT16_VALUE(3.51), SIMDE_FLOAT16_VALUE(3.51), SIMDE_FLOAT16_VALUE(3.51) } } }, + { { SIMDE_FLOAT16_VALUE(-46.99), SIMDE_FLOAT16_VALUE(-6.72), SIMDE_FLOAT16_VALUE(41.08) }, + { SIMDE_FLOAT16_VALUE(-2.06), SIMDE_FLOAT16_VALUE(-6.78), SIMDE_FLOAT16_VALUE(36.58)}, + { { SIMDE_FLOAT16_VALUE(-46.99), SIMDE_FLOAT16_VALUE(-46.99), SIMDE_FLOAT16_VALUE(-46.99), SIMDE_FLOAT16_VALUE(-46.99) }, + { SIMDE_FLOAT16_VALUE(-6.72), SIMDE_FLOAT16_VALUE(-6.72), SIMDE_FLOAT16_VALUE(-6.72), SIMDE_FLOAT16_VALUE(-6.72) }, + { SIMDE_FLOAT16_VALUE(41.08), SIMDE_FLOAT16_VALUE(41.08), SIMDE_FLOAT16_VALUE(41.08), SIMDE_FLOAT16_VALUE(41.08) } } }, + { { SIMDE_FLOAT16_VALUE(39.86), SIMDE_FLOAT16_VALUE(40.77), SIMDE_FLOAT16_VALUE(-32.22) }, + { SIMDE_FLOAT16_VALUE(-24.94), SIMDE_FLOAT16_VALUE(-38.80), SIMDE_FLOAT16_VALUE(7.87)}, + { { SIMDE_FLOAT16_VALUE(39.86), SIMDE_FLOAT16_VALUE(39.86), SIMDE_FLOAT16_VALUE(39.86), SIMDE_FLOAT16_VALUE(39.86) }, + { SIMDE_FLOAT16_VALUE(40.77), SIMDE_FLOAT16_VALUE(40.77), SIMDE_FLOAT16_VALUE(40.77), SIMDE_FLOAT16_VALUE(40.77) }, + { SIMDE_FLOAT16_VALUE(-32.22), SIMDE_FLOAT16_VALUE(-32.22), SIMDE_FLOAT16_VALUE(-32.22), SIMDE_FLOAT16_VALUE(-32.22) } } }, + { { SIMDE_FLOAT16_VALUE(3.72), SIMDE_FLOAT16_VALUE(45.41), SIMDE_FLOAT16_VALUE(4.48) }, + { SIMDE_FLOAT16_VALUE(47.09), SIMDE_FLOAT16_VALUE(-24.14), SIMDE_FLOAT16_VALUE(-0.13)}, + { { SIMDE_FLOAT16_VALUE(3.72), SIMDE_FLOAT16_VALUE(3.72), SIMDE_FLOAT16_VALUE(3.72), SIMDE_FLOAT16_VALUE(3.72) }, + { SIMDE_FLOAT16_VALUE(45.41), SIMDE_FLOAT16_VALUE(45.41), SIMDE_FLOAT16_VALUE(45.41), SIMDE_FLOAT16_VALUE(45.41) }, + { SIMDE_FLOAT16_VALUE(4.48), SIMDE_FLOAT16_VALUE(4.48), SIMDE_FLOAT16_VALUE(4.48), SIMDE_FLOAT16_VALUE(4.48) } } }, + { { SIMDE_FLOAT16_VALUE(-31.75), SIMDE_FLOAT16_VALUE(20.34), SIMDE_FLOAT16_VALUE(-29.59) }, + { SIMDE_FLOAT16_VALUE(-11.56), SIMDE_FLOAT16_VALUE(-7.53), SIMDE_FLOAT16_VALUE(28.61)}, + { { SIMDE_FLOAT16_VALUE(-31.75), SIMDE_FLOAT16_VALUE(-31.75), SIMDE_FLOAT16_VALUE(-31.75), SIMDE_FLOAT16_VALUE(-31.75) }, + { SIMDE_FLOAT16_VALUE(20.34), SIMDE_FLOAT16_VALUE(20.34), SIMDE_FLOAT16_VALUE(20.34), SIMDE_FLOAT16_VALUE(20.34) }, + { SIMDE_FLOAT16_VALUE(-29.59), SIMDE_FLOAT16_VALUE(-29.59), SIMDE_FLOAT16_VALUE(-29.59), SIMDE_FLOAT16_VALUE(-29.59) } } }, + { { SIMDE_FLOAT16_VALUE(37.05), SIMDE_FLOAT16_VALUE(17.72), SIMDE_FLOAT16_VALUE(-49.37) }, + { SIMDE_FLOAT16_VALUE(-27.61), SIMDE_FLOAT16_VALUE(-29.16), SIMDE_FLOAT16_VALUE(2.30)}, + { { SIMDE_FLOAT16_VALUE(37.05), SIMDE_FLOAT16_VALUE(37.05), SIMDE_FLOAT16_VALUE(37.05), SIMDE_FLOAT16_VALUE(37.05) }, + { SIMDE_FLOAT16_VALUE(17.72), SIMDE_FLOAT16_VALUE(17.72), SIMDE_FLOAT16_VALUE(17.72), SIMDE_FLOAT16_VALUE(17.72) }, + { SIMDE_FLOAT16_VALUE(-49.37), SIMDE_FLOAT16_VALUE(-49.37), SIMDE_FLOAT16_VALUE(-49.37), SIMDE_FLOAT16_VALUE(-49.37) } } }, + { { SIMDE_FLOAT16_VALUE(-49.24), SIMDE_FLOAT16_VALUE(-2.48), SIMDE_FLOAT16_VALUE(-45.21) }, + { SIMDE_FLOAT16_VALUE(19.81), SIMDE_FLOAT16_VALUE(5.79), SIMDE_FLOAT16_VALUE(42.08)}, + { { SIMDE_FLOAT16_VALUE(-49.24), SIMDE_FLOAT16_VALUE(-49.24), SIMDE_FLOAT16_VALUE(-49.24), SIMDE_FLOAT16_VALUE(-49.24) }, + { SIMDE_FLOAT16_VALUE(-2.48), SIMDE_FLOAT16_VALUE(-2.48), SIMDE_FLOAT16_VALUE(-2.48), SIMDE_FLOAT16_VALUE(-2.48) }, + { SIMDE_FLOAT16_VALUE(-45.21), SIMDE_FLOAT16_VALUE(-45.21), SIMDE_FLOAT16_VALUE(-45.21), SIMDE_FLOAT16_VALUE(-45.21) } } }, + { { SIMDE_FLOAT16_VALUE(26.41), SIMDE_FLOAT16_VALUE(49.96), SIMDE_FLOAT16_VALUE(20.11) }, + { SIMDE_FLOAT16_VALUE(34.64), SIMDE_FLOAT16_VALUE(-7.87), SIMDE_FLOAT16_VALUE(-43.78)}, + { { SIMDE_FLOAT16_VALUE(26.41), SIMDE_FLOAT16_VALUE(26.41), SIMDE_FLOAT16_VALUE(26.41), SIMDE_FLOAT16_VALUE(26.41) }, + { SIMDE_FLOAT16_VALUE(49.96), SIMDE_FLOAT16_VALUE(49.96), SIMDE_FLOAT16_VALUE(49.96), SIMDE_FLOAT16_VALUE(49.96) }, + { SIMDE_FLOAT16_VALUE(20.11), SIMDE_FLOAT16_VALUE(20.11), SIMDE_FLOAT16_VALUE(20.11), SIMDE_FLOAT16_VALUE(20.11) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4x3_t r = simde_vld3_dup_f16(test_vec[i].a); + simde_float16x4x3_t expected = { + {simde_vld1_f16(test_vec[i].r[0]), simde_vld1_f16(test_vec[i].r[1]), simde_vld1_f16(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_f16x4(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f16x4(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f16x4(r.val[2], expected.val[2], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld3_dup_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + float a[3]; + float unused[3]; + float r[3][2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(-2382.50), SIMDE_FLOAT32_C(1337.52), SIMDE_FLOAT32_C(-2555.52) }, + { SIMDE_FLOAT32_C(2329.55), SIMDE_FLOAT32_C(-2887.02), SIMDE_FLOAT32_C(-1085.66)}, + { { SIMDE_FLOAT32_C(-2382.50), SIMDE_FLOAT32_C(-2382.50) }, + { SIMDE_FLOAT32_C(1337.52), SIMDE_FLOAT32_C(1337.52) }, + { SIMDE_FLOAT32_C(-2555.52), SIMDE_FLOAT32_C(-2555.52) } } }, + { { SIMDE_FLOAT32_C(-4735.63), SIMDE_FLOAT32_C(-1105.19), SIMDE_FLOAT32_C(2348.59) }, + { SIMDE_FLOAT32_C(1863.06), SIMDE_FLOAT32_C(-2989.05), SIMDE_FLOAT32_C(3957.38)}, + { { SIMDE_FLOAT32_C(-4735.63), SIMDE_FLOAT32_C(-4735.63) }, + { SIMDE_FLOAT32_C(-1105.19), SIMDE_FLOAT32_C(-1105.19) }, + { SIMDE_FLOAT32_C(2348.59), SIMDE_FLOAT32_C(2348.59) } } }, + { { SIMDE_FLOAT32_C(-2290.48), SIMDE_FLOAT32_C(1772.72), SIMDE_FLOAT32_C(4473.71) }, + { SIMDE_FLOAT32_C(-2560.60), SIMDE_FLOAT32_C(-3282.98), SIMDE_FLOAT32_C(-4654.17)}, + { { SIMDE_FLOAT32_C(-2290.48), SIMDE_FLOAT32_C(-2290.48) }, + { SIMDE_FLOAT32_C(1772.72), SIMDE_FLOAT32_C(1772.72) }, + { SIMDE_FLOAT32_C(4473.71), SIMDE_FLOAT32_C(4473.71) } } }, + { { SIMDE_FLOAT32_C(2639.57), SIMDE_FLOAT32_C(-2227.82), SIMDE_FLOAT32_C(4717.55) }, + { SIMDE_FLOAT32_C(4303.36), SIMDE_FLOAT32_C(-1218.20), SIMDE_FLOAT32_C(3830.48)}, + { { SIMDE_FLOAT32_C(2639.57), SIMDE_FLOAT32_C(2639.57) }, + { SIMDE_FLOAT32_C(-2227.82), SIMDE_FLOAT32_C(-2227.82) }, + { SIMDE_FLOAT32_C(4717.55), SIMDE_FLOAT32_C(4717.55) } } }, + { { SIMDE_FLOAT32_C(2452.62), SIMDE_FLOAT32_C(1625.56), SIMDE_FLOAT32_C(-228.25) }, + { SIMDE_FLOAT32_C(-1171.56), SIMDE_FLOAT32_C(1910.83), SIMDE_FLOAT32_C(3954.71)}, + { { SIMDE_FLOAT32_C(2452.62), SIMDE_FLOAT32_C(2452.62) }, + { SIMDE_FLOAT32_C(1625.56), SIMDE_FLOAT32_C(1625.56) }, + { SIMDE_FLOAT32_C(-228.25), SIMDE_FLOAT32_C(-228.25) } } }, + { { SIMDE_FLOAT32_C(-2742.46), SIMDE_FLOAT32_C(3293.38), SIMDE_FLOAT32_C(346.01) }, + { SIMDE_FLOAT32_C(4929.24), SIMDE_FLOAT32_C(2432.88), SIMDE_FLOAT32_C(1629.40)}, + { { SIMDE_FLOAT32_C(-2742.46), SIMDE_FLOAT32_C(-2742.46) }, + { SIMDE_FLOAT32_C(3293.38), SIMDE_FLOAT32_C(3293.38) }, + { SIMDE_FLOAT32_C(346.01), SIMDE_FLOAT32_C(346.01) } } }, + { { SIMDE_FLOAT32_C(4711.75), SIMDE_FLOAT32_C(4806.28), SIMDE_FLOAT32_C(941.70) }, + { SIMDE_FLOAT32_C(572.76), SIMDE_FLOAT32_C(2872.54), SIMDE_FLOAT32_C(2021.90)}, + { { SIMDE_FLOAT32_C(4711.75), SIMDE_FLOAT32_C(4711.75) }, + { SIMDE_FLOAT32_C(4806.28), SIMDE_FLOAT32_C(4806.28) }, + { SIMDE_FLOAT32_C(941.70), SIMDE_FLOAT32_C(941.70) } } }, + { { SIMDE_FLOAT32_C(4312.83), SIMDE_FLOAT32_C(-1093.57), SIMDE_FLOAT32_C(-4143.46) }, + { SIMDE_FLOAT32_C(-73.76), SIMDE_FLOAT32_C(4246.17), SIMDE_FLOAT32_C(-233.49)}, + { { SIMDE_FLOAT32_C(4312.83), SIMDE_FLOAT32_C(4312.83) }, + { SIMDE_FLOAT32_C(-1093.57), SIMDE_FLOAT32_C(-1093.57) }, + { SIMDE_FLOAT32_C(-4143.46), SIMDE_FLOAT32_C(-4143.46) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2x3_t r = simde_vld3_dup_f32(test_vec[i].a); + simde_float32x2x3_t expected = { + {simde_vld1_f32(test_vec[i].r[0]), simde_vld1_f32(test_vec[i].r[1]), simde_vld1_f32(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_f32x2(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f32x2(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f32x2(r.val[2], expected.val[2], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld3_dup_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64 a[3]; + simde_float64 unused[3]; + simde_float64 r[3][1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(236226.67), SIMDE_FLOAT64_C(192939.74), SIMDE_FLOAT64_C(384504.37) }, + { SIMDE_FLOAT64_C(-378339.05), SIMDE_FLOAT64_C(76696.47), SIMDE_FLOAT64_C(438417.90)}, + { { SIMDE_FLOAT64_C(236226.67) }, + { SIMDE_FLOAT64_C(192939.74) }, + { SIMDE_FLOAT64_C(384504.37) } } }, + { { SIMDE_FLOAT64_C(144897.63), SIMDE_FLOAT64_C(483753.45), SIMDE_FLOAT64_C(-377140.44) }, + { SIMDE_FLOAT64_C(-21825.93), SIMDE_FLOAT64_C(429765.52), SIMDE_FLOAT64_C(43437.54)}, + { { SIMDE_FLOAT64_C(144897.63) }, + { SIMDE_FLOAT64_C(483753.45) }, + { SIMDE_FLOAT64_C(-377140.44) } } }, + { { SIMDE_FLOAT64_C(394483.35), SIMDE_FLOAT64_C(-488967.56), SIMDE_FLOAT64_C(-59025.38) }, + { SIMDE_FLOAT64_C(-493532.19), SIMDE_FLOAT64_C(-466855.88), SIMDE_FLOAT64_C(-38442.08)}, + { { SIMDE_FLOAT64_C(394483.35) }, + { SIMDE_FLOAT64_C(-488967.56) }, + { SIMDE_FLOAT64_C(-59025.38) } } }, + { { SIMDE_FLOAT64_C(354858.43), SIMDE_FLOAT64_C(-400360.87), SIMDE_FLOAT64_C(375674.67) }, + { SIMDE_FLOAT64_C(-20167.12), SIMDE_FLOAT64_C(-172548.28), SIMDE_FLOAT64_C(-108982.35)}, + { { SIMDE_FLOAT64_C(354858.43) }, + { SIMDE_FLOAT64_C(-400360.87) }, + { SIMDE_FLOAT64_C(375674.67) } } }, + { { SIMDE_FLOAT64_C(379531.80), SIMDE_FLOAT64_C(-427312.47), SIMDE_FLOAT64_C(-243573.72) }, + { SIMDE_FLOAT64_C(-414651.89), SIMDE_FLOAT64_C(174702.00), SIMDE_FLOAT64_C(56533.13)}, + { { SIMDE_FLOAT64_C(379531.80) }, + { SIMDE_FLOAT64_C(-427312.47) }, + { SIMDE_FLOAT64_C(-243573.72) } } }, + { { SIMDE_FLOAT64_C(8488.31), SIMDE_FLOAT64_C(-324836.80), SIMDE_FLOAT64_C(-438953.32) }, + { SIMDE_FLOAT64_C(-109126.89), SIMDE_FLOAT64_C(-388098.72), SIMDE_FLOAT64_C(-93955.75)}, + { { SIMDE_FLOAT64_C(8488.31) }, + { SIMDE_FLOAT64_C(-324836.80) }, + { SIMDE_FLOAT64_C(-438953.32) } } }, + { { SIMDE_FLOAT64_C(-446810.13), SIMDE_FLOAT64_C(-275424.67), SIMDE_FLOAT64_C(453798.55) }, + { SIMDE_FLOAT64_C(493858.73), SIMDE_FLOAT64_C(73175.04), SIMDE_FLOAT64_C(-101153.68)}, + { { SIMDE_FLOAT64_C(-446810.13) }, + { SIMDE_FLOAT64_C(-275424.67) }, + { SIMDE_FLOAT64_C(453798.55) } } }, + { { SIMDE_FLOAT64_C(183424.28), SIMDE_FLOAT64_C(115671.75), SIMDE_FLOAT64_C(165990.60) }, + { SIMDE_FLOAT64_C(-460186.50), SIMDE_FLOAT64_C(408473.34), SIMDE_FLOAT64_C(85833.78)}, + { { SIMDE_FLOAT64_C(183424.28) }, + { SIMDE_FLOAT64_C(115671.75) }, + { SIMDE_FLOAT64_C(165990.60) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1x3_t r = simde_vld3_dup_f64(test_vec[i].a); + simde_float64x1x3_t expected = { + {simde_vld1_f64(test_vec[i].r[0]), simde_vld1_f64(test_vec[i].r[1]), simde_vld1_f64(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_f64x1(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f64x1(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f64x1(r.val[2], expected.val[2], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld3_dup_s8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int8_t a[3]; + int8_t unused[3]; + int8_t r[3][8]; + } test_vec[] = { + { { INT8_C(55), -INT8_C(67), INT8_C(34) }, + { -INT8_C(4), INT8_C(29), INT8_C(62)}, + { { INT8_C(55), INT8_C(55), INT8_C(55), INT8_C(55), + INT8_C(55), INT8_C(55), INT8_C(55), INT8_C(55) }, + { -INT8_C(67), -INT8_C(67), -INT8_C(67), -INT8_C(67), + -INT8_C(67), -INT8_C(67), -INT8_C(67), -INT8_C(67) }, + { INT8_C(34), INT8_C(34), INT8_C(34), INT8_C(34), + INT8_C(34), INT8_C(34), INT8_C(34), INT8_C(34) } } }, + { { INT8_C(58), INT8_C(53), INT8_C(86) }, + { INT8_C(5), INT8_C(6), -INT8_C(5)}, + { { INT8_C(58), INT8_C(58), INT8_C(58), INT8_C(58), + INT8_C(58), INT8_C(58), INT8_C(58), INT8_C(58) }, + { INT8_C(53), INT8_C(53), INT8_C(53), INT8_C(53), + INT8_C(53), INT8_C(53), INT8_C(53), INT8_C(53) }, + { INT8_C(86), INT8_C(86), INT8_C(86), INT8_C(86), + INT8_C(86), INT8_C(86), INT8_C(86), INT8_C(86) } } }, + { { INT8_C(92), -INT8_C(1), -INT8_C(94) }, + { -INT8_C(6), INT8_C(61), -INT8_C(61)}, + { { INT8_C(92), INT8_C(92), INT8_C(92), INT8_C(92), + INT8_C(92), INT8_C(92), INT8_C(92), INT8_C(92) }, + { -INT8_C(1), -INT8_C(1), -INT8_C(1), -INT8_C(1), + -INT8_C(1), -INT8_C(1), -INT8_C(1), -INT8_C(1) }, + { -INT8_C(94), -INT8_C(94), -INT8_C(94), -INT8_C(94), + -INT8_C(94), -INT8_C(94), -INT8_C(94), -INT8_C(94) } } }, + { { INT8_C(99), INT8_C(89), -INT8_C(30) }, + { -INT8_C(83), INT8_C(36), INT8_C(27)}, + { { INT8_C(99), INT8_C(99), INT8_C(99), INT8_C(99), + INT8_C(99), INT8_C(99), INT8_C(99), INT8_C(99) }, + { INT8_C(89), INT8_C(89), INT8_C(89), INT8_C(89), + INT8_C(89), INT8_C(89), INT8_C(89), INT8_C(89) }, + { -INT8_C(30), -INT8_C(30), -INT8_C(30), -INT8_C(30), + -INT8_C(30), -INT8_C(30), -INT8_C(30), -INT8_C(30) } } }, + { { -INT8_C(92), INT8_C(73), INT8_C(70) }, + { -INT8_C(33), -INT8_C(90), -INT8_C(72)}, + { { -INT8_C(92), -INT8_C(92), -INT8_C(92), -INT8_C(92), + -INT8_C(92), -INT8_C(92), -INT8_C(92), -INT8_C(92) }, + { INT8_C(73), INT8_C(73), INT8_C(73), INT8_C(73), + INT8_C(73), INT8_C(73), INT8_C(73), INT8_C(73) }, + { INT8_C(70), INT8_C(70), INT8_C(70), INT8_C(70), + INT8_C(70), INT8_C(70), INT8_C(70), INT8_C(70) } } }, + { { -INT8_C(93), -INT8_C(78), INT8_C(35) }, + { -INT8_C(20), -INT8_C(43), INT8_C(12)}, + { { -INT8_C(93), -INT8_C(93), -INT8_C(93), -INT8_C(93), + -INT8_C(93), -INT8_C(93), -INT8_C(93), -INT8_C(93) }, + { -INT8_C(78), -INT8_C(78), -INT8_C(78), -INT8_C(78), + -INT8_C(78), -INT8_C(78), -INT8_C(78), -INT8_C(78) }, + { INT8_C(35), INT8_C(35), INT8_C(35), INT8_C(35), + INT8_C(35), INT8_C(35), INT8_C(35), INT8_C(35) } } }, + { { -INT8_C(97), INT8_C(19), -INT8_C(95) }, + { INT8_C(10), INT8_C(91), -INT8_C(14)}, + { { -INT8_C(97), -INT8_C(97), -INT8_C(97), -INT8_C(97), + -INT8_C(97), -INT8_C(97), -INT8_C(97), -INT8_C(97) }, + { INT8_C(19), INT8_C(19), INT8_C(19), INT8_C(19), + INT8_C(19), INT8_C(19), INT8_C(19), INT8_C(19) }, + { -INT8_C(95), -INT8_C(95), -INT8_C(95), -INT8_C(95), + -INT8_C(95), -INT8_C(95), -INT8_C(95), -INT8_C(95) } } }, + { { INT8_C(36), -INT8_C(24), -INT8_C(59) }, + { -INT8_C(78), INT8_C(76), INT8_C(53)}, + { { INT8_C(36), INT8_C(36), INT8_C(36), INT8_C(36), + INT8_C(36), INT8_C(36), INT8_C(36), INT8_C(36) }, + { -INT8_C(24), -INT8_C(24), -INT8_C(24), -INT8_C(24), + -INT8_C(24), -INT8_C(24), -INT8_C(24), -INT8_C(24) }, + { -INT8_C(59), -INT8_C(59), -INT8_C(59), -INT8_C(59), + -INT8_C(59), -INT8_C(59), -INT8_C(59), -INT8_C(59) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x8x3_t r = simde_vld3_dup_s8(test_vec[i].a); + simde_int8x8x3_t expected = { + {simde_vld1_s8(test_vec[i].r[0]), simde_vld1_s8(test_vec[i].r[1]), simde_vld1_s8(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_i8x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i8x8(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i8x8(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3_dup_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[3]; + int16_t unused[3]; + int16_t r[3][4]; + } test_vec[] = { + { { -INT16_C(2921), INT16_C(8854), -INT16_C(1686) }, + { -INT16_C(4075), -INT16_C(1501), INT16_C(1121)}, + { { -INT16_C(2921), -INT16_C(2921), -INT16_C(2921), -INT16_C(2921) }, + { INT16_C(8854), INT16_C(8854), INT16_C(8854), INT16_C(8854) }, + { -INT16_C(1686), -INT16_C(1686), -INT16_C(1686), -INT16_C(1686) } } }, + { { -INT16_C(6896), INT16_C(2743), -INT16_C(1644) }, + { -INT16_C(2845), -INT16_C(6251), INT16_C(9882)}, + { { -INT16_C(6896), -INT16_C(6896), -INT16_C(6896), -INT16_C(6896) }, + { INT16_C(2743), INT16_C(2743), INT16_C(2743), INT16_C(2743) }, + { -INT16_C(1644), -INT16_C(1644), -INT16_C(1644), -INT16_C(1644) } } }, + { { INT16_C(7425), INT16_C(1407), -INT16_C(9810) }, + { INT16_C(5827), INT16_C(7010), INT16_C(2508)}, + { { INT16_C(7425), INT16_C(7425), INT16_C(7425), INT16_C(7425) }, + { INT16_C(1407), INT16_C(1407), INT16_C(1407), INT16_C(1407) }, + { -INT16_C(9810), -INT16_C(9810), -INT16_C(9810), -INT16_C(9810) } } }, + { { -INT16_C(5814), INT16_C(168), -INT16_C(2609) }, + { INT16_C(7740), INT16_C(5018), -INT16_C(7738)}, + { { -INT16_C(5814), -INT16_C(5814), -INT16_C(5814), -INT16_C(5814) }, + { INT16_C(168), INT16_C(168), INT16_C(168), INT16_C(168) }, + { -INT16_C(2609), -INT16_C(2609), -INT16_C(2609), -INT16_C(2609) } } }, + { { INT16_C(4688), -INT16_C(4953), -INT16_C(1696) }, + { INT16_C(7371), -INT16_C(9194), INT16_C(8018)}, + { { INT16_C(4688), INT16_C(4688), INT16_C(4688), INT16_C(4688) }, + { -INT16_C(4953), -INT16_C(4953), -INT16_C(4953), -INT16_C(4953) }, + { -INT16_C(1696), -INT16_C(1696), -INT16_C(1696), -INT16_C(1696) } } }, + { { INT16_C(5322), -INT16_C(3475), -INT16_C(1646) }, + { INT16_C(3260), INT16_C(9616), -INT16_C(7563)}, + { { INT16_C(5322), INT16_C(5322), INT16_C(5322), INT16_C(5322) }, + { -INT16_C(3475), -INT16_C(3475), -INT16_C(3475), -INT16_C(3475) }, + { -INT16_C(1646), -INT16_C(1646), -INT16_C(1646), -INT16_C(1646) } } }, + { { INT16_C(1866), INT16_C(5035), INT16_C(6718) }, + { -INT16_C(3904), INT16_C(8154), INT16_C(3705)}, + { { INT16_C(1866), INT16_C(1866), INT16_C(1866), INT16_C(1866) }, + { INT16_C(5035), INT16_C(5035), INT16_C(5035), INT16_C(5035) }, + { INT16_C(6718), INT16_C(6718), INT16_C(6718), INT16_C(6718) } } }, + { { INT16_C(2449), INT16_C(2426), -INT16_C(4109) }, + { -INT16_C(3807), INT16_C(2524), -INT16_C(585)}, + { { INT16_C(2449), INT16_C(2449), INT16_C(2449), INT16_C(2449) }, + { INT16_C(2426), INT16_C(2426), INT16_C(2426), INT16_C(2426) }, + { -INT16_C(4109), -INT16_C(4109), -INT16_C(4109), -INT16_C(4109) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x4x3_t r = simde_vld3_dup_s16(test_vec[i].a); + simde_int16x4x3_t expected = { + {simde_vld1_s16(test_vec[i].r[0]), simde_vld1_s16(test_vec[i].r[1]), simde_vld1_s16(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_i16x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i16x4(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i16x4(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3_dup_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[3]; + int32_t unused[3]; + int32_t r[3][2]; + } test_vec[] = { + { { INT32_C(9359), -INT32_C(791754), INT32_C(780198) }, + { -INT32_C(692894), -INT32_C(601710), INT32_C(160090)}, + { { INT32_C(9359), INT32_C(9359) }, + { -INT32_C(791754), -INT32_C(791754) }, + { INT32_C(780198), INT32_C(780198) } } }, + { { -INT32_C(445729), INT32_C(492254), INT32_C(729801) }, + { INT32_C(172233), -INT32_C(996411), INT32_C(527512)}, + { { -INT32_C(445729), -INT32_C(445729) }, + { INT32_C(492254), INT32_C(492254) }, + { INT32_C(729801), INT32_C(729801) } } }, + { { -INT32_C(80447), -INT32_C(632170), INT32_C(712178) }, + { INT32_C(644121), INT32_C(399198), INT32_C(706984)}, + { { -INT32_C(80447), -INT32_C(80447) }, + { -INT32_C(632170), -INT32_C(632170) }, + { INT32_C(712178), INT32_C(712178) } } }, + { { -INT32_C(181596), INT32_C(493128), -INT32_C(330413) }, + { -INT32_C(510772), -INT32_C(439183), INT32_C(228483)}, + { { -INT32_C(181596), -INT32_C(181596) }, + { INT32_C(493128), INT32_C(493128) }, + { -INT32_C(330413), -INT32_C(330413) } } }, + { { INT32_C(415181), -INT32_C(745887), -INT32_C(767306) }, + { INT32_C(843741), INT32_C(249059), -INT32_C(35530)}, + { { INT32_C(415181), INT32_C(415181) }, + { -INT32_C(745887), -INT32_C(745887) }, + { -INT32_C(767306), -INT32_C(767306) } } }, + { { INT32_C(264934), -INT32_C(922140), -INT32_C(924397) }, + { -INT32_C(823933), INT32_C(333243), -INT32_C(862050)}, + { { INT32_C(264934), INT32_C(264934) }, + { -INT32_C(922140), -INT32_C(922140) }, + { -INT32_C(924397), -INT32_C(924397) } } }, + { { -INT32_C(211892), -INT32_C(108610), INT32_C(775205) }, + { -INT32_C(732876), INT32_C(220414), -INT32_C(39935)}, + { { -INT32_C(211892), -INT32_C(211892) }, + { -INT32_C(108610), -INT32_C(108610) }, + { INT32_C(775205), INT32_C(775205) } } }, + { { INT32_C(52472), INT32_C(404931), -INT32_C(266802) }, + { INT32_C(192308), INT32_C(487453), INT32_C(771119)}, + { { INT32_C(52472), INT32_C(52472) }, + { INT32_C(404931), INT32_C(404931) }, + { -INT32_C(266802), -INT32_C(266802) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x2x3_t r = simde_vld3_dup_s32(test_vec[i].a); + simde_int32x2x3_t expected = { + {simde_vld1_s32(test_vec[i].r[0]), simde_vld1_s32(test_vec[i].r[1]), simde_vld1_s32(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_i32x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i32x2(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i32x2(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3_dup_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[3]; + int64_t unused[3]; + int64_t r[3][1]; + } test_vec[] = { + { { INT64_C(51518731), -INT64_C(53716248), -INT64_C(89452262) }, + { INT64_C(84707352), -INT64_C(86842244), -INT64_C(80580693)}, + { { INT64_C(51518731) }, + { -INT64_C(53716248) }, + { -INT64_C(89452262) } } }, + { { -INT64_C(44610849), -INT64_C(52160709), -INT64_C(95346428) }, + { INT64_C(72202015), INT64_C(35144772), -INT64_C(29764090)}, + { { -INT64_C(44610849) }, + { -INT64_C(52160709) }, + { -INT64_C(95346428) } } }, + { { -INT64_C(13186336), INT64_C(44857075), -INT64_C(85282798) }, + { INT64_C(56936164), INT64_C(65920241), INT64_C(30317650)}, + { { -INT64_C(13186336) }, + { INT64_C(44857075) }, + { -INT64_C(85282798) } } }, + { { -INT64_C(52586359), -INT64_C(72094720), INT64_C(86111358) }, + { INT64_C(38491545), -INT64_C(41719422), INT64_C(26443629)}, + { { -INT64_C(52586359) }, + { -INT64_C(72094720) }, + { INT64_C(86111358) } } }, + { { -INT64_C(7491042), INT64_C(65098523), INT64_C(45614065) }, + { -INT64_C(34048766), INT64_C(32766230), -INT64_C(19050598)}, + { { -INT64_C(7491042) }, + { INT64_C(65098523) }, + { INT64_C(45614065) } } }, + { { -INT64_C(15626393), -INT64_C(90534831), INT64_C(77179247) }, + { INT64_C(66256354), INT64_C(28944347), -INT64_C(58272864)}, + { { -INT64_C(15626393) }, + { -INT64_C(90534831) }, + { INT64_C(77179247) } } }, + { { -INT64_C(591805), -INT64_C(23226575), -INT64_C(94673457) }, + { INT64_C(92387981), -INT64_C(78597599), INT64_C(71171711)}, + { { -INT64_C(591805) }, + { -INT64_C(23226575) }, + { -INT64_C(94673457) } } }, + { { -INT64_C(22331289), INT64_C(63626465), -INT64_C(59469151) }, + { -INT64_C(50710329), INT64_C(52199199), INT64_C(34000198)}, + { { -INT64_C(22331289) }, + { INT64_C(63626465) }, + { -INT64_C(59469151) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x1x3_t r = simde_vld3_dup_s64(test_vec[i].a); + simde_int64x1x3_t expected = { + {simde_vld1_s64(test_vec[i].r[0]), simde_vld1_s64(test_vec[i].r[1]), simde_vld1_s64(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_i64x1(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i64x1(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i64x1(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3_dup_u8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint8_t a[3]; + uint8_t unused[3]; + uint8_t r[3][8]; + } test_vec[] = { + { { UINT8_C(135), UINT8_C(154), UINT8_C(34) }, + { UINT8_C(4), UINT8_C(55), UINT8_C(125)}, + { { UINT8_C(135), UINT8_C(135), UINT8_C(135), UINT8_C(135), + UINT8_C(135), UINT8_C(135), UINT8_C(135), UINT8_C(135) }, + { UINT8_C(154), UINT8_C(154), UINT8_C(154), UINT8_C(154), + UINT8_C(154), UINT8_C(154), UINT8_C(154), UINT8_C(154) }, + { UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34), + UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34) } } }, + { { UINT8_C(182), UINT8_C(2), UINT8_C(184) }, + { UINT8_C(165), UINT8_C(123), UINT8_C(191)}, + { { UINT8_C(182), UINT8_C(182), UINT8_C(182), UINT8_C(182), + UINT8_C(182), UINT8_C(182), UINT8_C(182), UINT8_C(182) }, + { UINT8_C(2), UINT8_C(2), UINT8_C(2), UINT8_C(2), + UINT8_C(2), UINT8_C(2), UINT8_C(2), UINT8_C(2) }, + { UINT8_C(184), UINT8_C(184), UINT8_C(184), UINT8_C(184), + UINT8_C(184), UINT8_C(184), UINT8_C(184), UINT8_C(184) } } }, + { { UINT8_C(127), UINT8_C(64), UINT8_C(31) }, + { UINT8_C(41), UINT8_C(77), UINT8_C(44)}, + { { UINT8_C(127), UINT8_C(127), UINT8_C(127), UINT8_C(127), + UINT8_C(127), UINT8_C(127), UINT8_C(127), UINT8_C(127) }, + { UINT8_C(64), UINT8_C(64), UINT8_C(64), UINT8_C(64), + UINT8_C(64), UINT8_C(64), UINT8_C(64), UINT8_C(64) }, + { UINT8_C(31), UINT8_C(31), UINT8_C(31), UINT8_C(31), + UINT8_C(31), UINT8_C(31), UINT8_C(31), UINT8_C(31) } } }, + { { UINT8_C(95), UINT8_C(134), UINT8_C(151) }, + { UINT8_C(30), UINT8_C(126), UINT8_C(11)}, + { { UINT8_C(95), UINT8_C(95), UINT8_C(95), UINT8_C(95), + UINT8_C(95), UINT8_C(95), UINT8_C(95), UINT8_C(95) }, + { UINT8_C(134), UINT8_C(134), UINT8_C(134), UINT8_C(134), + UINT8_C(134), UINT8_C(134), UINT8_C(134), UINT8_C(134) }, + { UINT8_C(151), UINT8_C(151), UINT8_C(151), UINT8_C(151), + UINT8_C(151), UINT8_C(151), UINT8_C(151), UINT8_C(151) } } }, + { { UINT8_C(198), UINT8_C(171), UINT8_C(185) }, + { UINT8_C(44), UINT8_C(68), UINT8_C(53)}, + { { UINT8_C(198), UINT8_C(198), UINT8_C(198), UINT8_C(198), + UINT8_C(198), UINT8_C(198), UINT8_C(198), UINT8_C(198) }, + { UINT8_C(171), UINT8_C(171), UINT8_C(171), UINT8_C(171), + UINT8_C(171), UINT8_C(171), UINT8_C(171), UINT8_C(171) }, + { UINT8_C(185), UINT8_C(185), UINT8_C(185), UINT8_C(185), + UINT8_C(185), UINT8_C(185), UINT8_C(185), UINT8_C(185) } } }, + { { UINT8_C(149), UINT8_C(156), UINT8_C(25) }, + { UINT8_C(131), UINT8_C(33), UINT8_C(82)}, + { { UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149), + UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149) }, + { UINT8_C(156), UINT8_C(156), UINT8_C(156), UINT8_C(156), + UINT8_C(156), UINT8_C(156), UINT8_C(156), UINT8_C(156) }, + { UINT8_C(25), UINT8_C(25), UINT8_C(25), UINT8_C(25), + UINT8_C(25), UINT8_C(25), UINT8_C(25), UINT8_C(25) } } }, + { { UINT8_C(168), UINT8_C(104), UINT8_C(156) }, + { UINT8_C(82), UINT8_C(180), UINT8_C(74)}, + { { UINT8_C(168), UINT8_C(168), UINT8_C(168), UINT8_C(168), + UINT8_C(168), UINT8_C(168), UINT8_C(168), UINT8_C(168) }, + { UINT8_C(104), UINT8_C(104), UINT8_C(104), UINT8_C(104), + UINT8_C(104), UINT8_C(104), UINT8_C(104), UINT8_C(104) }, + { UINT8_C(156), UINT8_C(156), UINT8_C(156), UINT8_C(156), + UINT8_C(156), UINT8_C(156), UINT8_C(156), UINT8_C(156) } } }, + { { UINT8_C(20), UINT8_C(9), UINT8_C(108) }, + { UINT8_C(197), UINT8_C(40), UINT8_C(191)}, + { { UINT8_C(20), UINT8_C(20), UINT8_C(20), UINT8_C(20), + UINT8_C(20), UINT8_C(20), UINT8_C(20), UINT8_C(20) }, + { UINT8_C(9), UINT8_C(9), UINT8_C(9), UINT8_C(9), + UINT8_C(9), UINT8_C(9), UINT8_C(9), UINT8_C(9) }, + { UINT8_C(108), UINT8_C(108), UINT8_C(108), UINT8_C(108), + UINT8_C(108), UINT8_C(108), UINT8_C(108), UINT8_C(108) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x8x3_t r = simde_vld3_dup_u8(test_vec[i].a); + simde_uint8x8x3_t expected = { + {simde_vld1_u8(test_vec[i].r[0]), simde_vld1_u8(test_vec[i].r[1]), simde_vld1_u8(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_u8x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u8x8(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u8x8(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3_dup_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[3]; + uint16_t unused[3]; + uint16_t r[3][4]; + } test_vec[] = { + { { UINT16_C(13377), UINT16_C(10805), UINT16_C(14630) }, + { UINT16_C(10222), UINT16_C(12690), UINT16_C(19072)}, + { { UINT16_C(13377), UINT16_C(13377), UINT16_C(13377), UINT16_C(13377) }, + { UINT16_C(10805), UINT16_C(10805), UINT16_C(10805), UINT16_C(10805) }, + { UINT16_C(14630), UINT16_C(14630), UINT16_C(14630), UINT16_C(14630) } } }, + { { UINT16_C(8093), UINT16_C(11570), UINT16_C(183) }, + { UINT16_C(14974), UINT16_C(2867), UINT16_C(19983)}, + { { UINT16_C(8093), UINT16_C(8093), UINT16_C(8093), UINT16_C(8093) }, + { UINT16_C(11570), UINT16_C(11570), UINT16_C(11570), UINT16_C(11570) }, + { UINT16_C(183), UINT16_C(183), UINT16_C(183), UINT16_C(183) } } }, + { { UINT16_C(51), UINT16_C(11622), UINT16_C(2412) }, + { UINT16_C(186), UINT16_C(13191), UINT16_C(7620)}, + { { UINT16_C(51), UINT16_C(51), UINT16_C(51), UINT16_C(51) }, + { UINT16_C(11622), UINT16_C(11622), UINT16_C(11622), UINT16_C(11622) }, + { UINT16_C(2412), UINT16_C(2412), UINT16_C(2412), UINT16_C(2412) } } }, + { { UINT16_C(1319), UINT16_C(3015), UINT16_C(17065) }, + { UINT16_C(2405), UINT16_C(6666), UINT16_C(16892)}, + { { UINT16_C(1319), UINT16_C(1319), UINT16_C(1319), UINT16_C(1319) }, + { UINT16_C(3015), UINT16_C(3015), UINT16_C(3015), UINT16_C(3015) }, + { UINT16_C(17065), UINT16_C(17065), UINT16_C(17065), UINT16_C(17065) } } }, + { { UINT16_C(5845), UINT16_C(17414), UINT16_C(9474) }, + { UINT16_C(17463), UINT16_C(1796), UINT16_C(5656)}, + { { UINT16_C(5845), UINT16_C(5845), UINT16_C(5845), UINT16_C(5845) }, + { UINT16_C(17414), UINT16_C(17414), UINT16_C(17414), UINT16_C(17414) }, + { UINT16_C(9474), UINT16_C(9474), UINT16_C(9474), UINT16_C(9474) } } }, + { { UINT16_C(4232), UINT16_C(1927), UINT16_C(15662) }, + { UINT16_C(4302), UINT16_C(2477), UINT16_C(16338)}, + { { UINT16_C(4232), UINT16_C(4232), UINT16_C(4232), UINT16_C(4232) }, + { UINT16_C(1927), UINT16_C(1927), UINT16_C(1927), UINT16_C(1927) }, + { UINT16_C(15662), UINT16_C(15662), UINT16_C(15662), UINT16_C(15662) } } }, + { { UINT16_C(2091), UINT16_C(9041), UINT16_C(5199) }, + { UINT16_C(2392), UINT16_C(11870), UINT16_C(18327)}, + { { UINT16_C(2091), UINT16_C(2091), UINT16_C(2091), UINT16_C(2091) }, + { UINT16_C(9041), UINT16_C(9041), UINT16_C(9041), UINT16_C(9041) }, + { UINT16_C(5199), UINT16_C(5199), UINT16_C(5199), UINT16_C(5199) } } }, + { { UINT16_C(9669), UINT16_C(11054), UINT16_C(16736) }, + { UINT16_C(8412), UINT16_C(8721), UINT16_C(13754)}, + { { UINT16_C(9669), UINT16_C(9669), UINT16_C(9669), UINT16_C(9669) }, + { UINT16_C(11054), UINT16_C(11054), UINT16_C(11054), UINT16_C(11054) }, + { UINT16_C(16736), UINT16_C(16736), UINT16_C(16736), UINT16_C(16736) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x4x3_t r = simde_vld3_dup_u16(test_vec[i].a); + simde_uint16x4x3_t expected = { + {simde_vld1_u16(test_vec[i].r[0]), simde_vld1_u16(test_vec[i].r[1]), simde_vld1_u16(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_u16x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u16x4(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u16x4(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3_dup_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[3]; + uint32_t unused[3]; + uint32_t r[3][2]; + } test_vec[] = { + { { UINT32_C(395765), UINT32_C(1580157), UINT32_C(1297063) }, + { UINT32_C(437858), UINT32_C(950362), UINT32_C(611421)}, + { { UINT32_C(395765), UINT32_C(395765) }, + { UINT32_C(1580157), UINT32_C(1580157) }, + { UINT32_C(1297063), UINT32_C(1297063) } } }, + { { UINT32_C(795810), UINT32_C(971010), UINT32_C(508108) }, + { UINT32_C(99068), UINT32_C(1593779), UINT32_C(1092559)}, + { { UINT32_C(795810), UINT32_C(795810) }, + { UINT32_C(971010), UINT32_C(971010) }, + { UINT32_C(508108), UINT32_C(508108) } } }, + { { UINT32_C(851511), UINT32_C(1871084), UINT32_C(1077984) }, + { UINT32_C(441102), UINT32_C(921978), UINT32_C(185864)}, + { { UINT32_C(851511), UINT32_C(851511) }, + { UINT32_C(1871084), UINT32_C(1871084) }, + { UINT32_C(1077984), UINT32_C(1077984) } } }, + { { UINT32_C(44627), UINT32_C(755307), UINT32_C(1310678) }, + { UINT32_C(1692632), UINT32_C(1730487), UINT32_C(33593)}, + { { UINT32_C(44627), UINT32_C(44627) }, + { UINT32_C(755307), UINT32_C(755307) }, + { UINT32_C(1310678), UINT32_C(1310678) } } }, + { { UINT32_C(1123068), UINT32_C(1764923), UINT32_C(135672) }, + { UINT32_C(748360), UINT32_C(1811211), UINT32_C(237332)}, + { { UINT32_C(1123068), UINT32_C(1123068) }, + { UINT32_C(1764923), UINT32_C(1764923) }, + { UINT32_C(135672), UINT32_C(135672) } } }, + { { UINT32_C(1762919), UINT32_C(212442), UINT32_C(403640) }, + { UINT32_C(95742), UINT32_C(870777), UINT32_C(1775043)}, + { { UINT32_C(1762919), UINT32_C(1762919) }, + { UINT32_C(212442), UINT32_C(212442) }, + { UINT32_C(403640), UINT32_C(403640) } } }, + { { UINT32_C(838356), UINT32_C(1713991), UINT32_C(1501513) }, + { UINT32_C(1515420), UINT32_C(835038), UINT32_C(322935)}, + { { UINT32_C(838356), UINT32_C(838356) }, + { UINT32_C(1713991), UINT32_C(1713991) }, + { UINT32_C(1501513), UINT32_C(1501513) } } }, + { { UINT32_C(295067), UINT32_C(128462), UINT32_C(205058) }, + { UINT32_C(758571), UINT32_C(757108), UINT32_C(669760)}, + { { UINT32_C(295067), UINT32_C(295067) }, + { UINT32_C(128462), UINT32_C(128462) }, + { UINT32_C(205058), UINT32_C(205058) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x2x3_t r = simde_vld3_dup_u32(test_vec[i].a); + simde_uint32x2x3_t expected = { + {simde_vld1_u32(test_vec[i].r[0]), simde_vld1_u32(test_vec[i].r[1]), simde_vld1_u32(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_u32x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u32x2(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u32x2(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3_dup_u64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[3]; + uint64_t unused[3]; + uint64_t r[3][1]; + } test_vec[] = { + { { UINT64_C(160975844), UINT64_C(153508503), UINT64_C(173493973) }, + { UINT64_C(166251921), UINT64_C(65830676), UINT64_C(97239677)}, + { { UINT64_C(160975844) }, + { UINT64_C(153508503) }, + { UINT64_C(173493973) } } }, + { { UINT64_C(187315367), UINT64_C(5800842), UINT64_C(115739498) }, + { UINT64_C(84653812), UINT64_C(120517420), UINT64_C(175798293)}, + { { UINT64_C(187315367) }, + { UINT64_C(5800842) }, + { UINT64_C(115739498) } } }, + { { UINT64_C(76024341), UINT64_C(155566333), UINT64_C(101038279) }, + { UINT64_C(46074453), UINT64_C(151593559), UINT64_C(39262253)}, + { { UINT64_C(76024341) }, + { UINT64_C(155566333) }, + { UINT64_C(101038279) } } }, + { { UINT64_C(46906477), UINT64_C(997782), UINT64_C(69721610) }, + { UINT64_C(31134385), UINT64_C(173251253), UINT64_C(151509065)}, + { { UINT64_C(46906477) }, + { UINT64_C(997782) }, + { UINT64_C(69721610) } } }, + { { UINT64_C(59105148), UINT64_C(194933961), UINT64_C(18072130) }, + { UINT64_C(67463877), UINT64_C(80370119), UINT64_C(107818223)}, + { { UINT64_C(59105148) }, + { UINT64_C(194933961) }, + { UINT64_C(18072130) } } }, + { { UINT64_C(187736185), UINT64_C(161160447), UINT64_C(24909245) }, + { UINT64_C(110288501), UINT64_C(110076399), UINT64_C(184478722)}, + { { UINT64_C(187736185) }, + { UINT64_C(161160447) }, + { UINT64_C(24909245) } } }, + { { UINT64_C(113683975), UINT64_C(71734803), UINT64_C(50110366) }, + { UINT64_C(20964634), UINT64_C(167646318), UINT64_C(171677397)}, + { { UINT64_C(113683975) }, + { UINT64_C(71734803) }, + { UINT64_C(50110366) } } }, + { { UINT64_C(5715707), UINT64_C(147794026), UINT64_C(162515415) }, + { UINT64_C(142055920), UINT64_C(10766156), UINT64_C(64823110)}, + { { UINT64_C(5715707) }, + { UINT64_C(147794026) }, + { UINT64_C(162515415) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x1x3_t r = simde_vld3_dup_u64(test_vec[i].a); + simde_uint64x1x3_t expected = { + {simde_vld1_u64(test_vec[i].r[0]), simde_vld1_u64(test_vec[i].r[1]), simde_vld1_u64(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_u64x1(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u64x1(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u64x1(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3q_dup_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[3]; + simde_float16_t unused[3]; + simde_float16_t r[3][8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(20.19), SIMDE_FLOAT16_VALUE(16.39), SIMDE_FLOAT16_VALUE(46.17) }, + { SIMDE_FLOAT16_VALUE(29.32), SIMDE_FLOAT16_VALUE(-18.79), SIMDE_FLOAT16_VALUE(30.83)}, + { { SIMDE_FLOAT16_VALUE(20.19), SIMDE_FLOAT16_VALUE(20.19), SIMDE_FLOAT16_VALUE(20.19), SIMDE_FLOAT16_VALUE(20.19), + SIMDE_FLOAT16_VALUE(20.19), SIMDE_FLOAT16_VALUE(20.19), SIMDE_FLOAT16_VALUE(20.19), SIMDE_FLOAT16_VALUE(20.19) }, + { SIMDE_FLOAT16_VALUE(16.39), SIMDE_FLOAT16_VALUE(16.39), SIMDE_FLOAT16_VALUE(16.39), SIMDE_FLOAT16_VALUE(16.39), + SIMDE_FLOAT16_VALUE(16.39), SIMDE_FLOAT16_VALUE(16.39), SIMDE_FLOAT16_VALUE(16.39), SIMDE_FLOAT16_VALUE(16.39) }, + { SIMDE_FLOAT16_VALUE(46.17), SIMDE_FLOAT16_VALUE(46.17), SIMDE_FLOAT16_VALUE(46.17), SIMDE_FLOAT16_VALUE(46.17), + SIMDE_FLOAT16_VALUE(46.17), SIMDE_FLOAT16_VALUE(46.17), SIMDE_FLOAT16_VALUE(46.17), SIMDE_FLOAT16_VALUE(46.17) } } }, + { { SIMDE_FLOAT16_VALUE(-48.67), SIMDE_FLOAT16_VALUE(-21.20), SIMDE_FLOAT16_VALUE(-45.20) }, + { SIMDE_FLOAT16_VALUE(6.72), SIMDE_FLOAT16_VALUE(10.70), SIMDE_FLOAT16_VALUE(32.09)}, + { { SIMDE_FLOAT16_VALUE(-48.67), SIMDE_FLOAT16_VALUE(-48.67), SIMDE_FLOAT16_VALUE(-48.67), SIMDE_FLOAT16_VALUE(-48.67), + SIMDE_FLOAT16_VALUE(-48.67), SIMDE_FLOAT16_VALUE(-48.67), SIMDE_FLOAT16_VALUE(-48.67), SIMDE_FLOAT16_VALUE(-48.67) }, + { SIMDE_FLOAT16_VALUE(-21.20), SIMDE_FLOAT16_VALUE(-21.20), SIMDE_FLOAT16_VALUE(-21.20), SIMDE_FLOAT16_VALUE(-21.20), + SIMDE_FLOAT16_VALUE(-21.20), SIMDE_FLOAT16_VALUE(-21.20), SIMDE_FLOAT16_VALUE(-21.20), SIMDE_FLOAT16_VALUE(-21.20) }, + { SIMDE_FLOAT16_VALUE(-45.20), SIMDE_FLOAT16_VALUE(-45.20), SIMDE_FLOAT16_VALUE(-45.20), SIMDE_FLOAT16_VALUE(-45.20), + SIMDE_FLOAT16_VALUE(-45.20), SIMDE_FLOAT16_VALUE(-45.20), SIMDE_FLOAT16_VALUE(-45.20), SIMDE_FLOAT16_VALUE(-45.20) } } }, + { { SIMDE_FLOAT16_VALUE(-29.85), SIMDE_FLOAT16_VALUE(24.77), SIMDE_FLOAT16_VALUE(7.75) }, + { SIMDE_FLOAT16_VALUE(-19.50), SIMDE_FLOAT16_VALUE(-45.53), SIMDE_FLOAT16_VALUE(21.67)}, + { { SIMDE_FLOAT16_VALUE(-29.85), SIMDE_FLOAT16_VALUE(-29.85), SIMDE_FLOAT16_VALUE(-29.85), SIMDE_FLOAT16_VALUE(-29.85), + SIMDE_FLOAT16_VALUE(-29.85), SIMDE_FLOAT16_VALUE(-29.85), SIMDE_FLOAT16_VALUE(-29.85), SIMDE_FLOAT16_VALUE(-29.85) }, + { SIMDE_FLOAT16_VALUE(24.77), SIMDE_FLOAT16_VALUE(24.77), SIMDE_FLOAT16_VALUE(24.77), SIMDE_FLOAT16_VALUE(24.77), + SIMDE_FLOAT16_VALUE(24.77), SIMDE_FLOAT16_VALUE(24.77), SIMDE_FLOAT16_VALUE(24.77), SIMDE_FLOAT16_VALUE(24.77) }, + { SIMDE_FLOAT16_VALUE(7.75), SIMDE_FLOAT16_VALUE(7.75), SIMDE_FLOAT16_VALUE(7.75), SIMDE_FLOAT16_VALUE(7.75), + SIMDE_FLOAT16_VALUE(7.75), SIMDE_FLOAT16_VALUE(7.75), SIMDE_FLOAT16_VALUE(7.75), SIMDE_FLOAT16_VALUE(7.75) } } }, + { { SIMDE_FLOAT16_VALUE(-4.91), SIMDE_FLOAT16_VALUE(-25.17), SIMDE_FLOAT16_VALUE(45.00) }, + { SIMDE_FLOAT16_VALUE(2.89), SIMDE_FLOAT16_VALUE(7.87), SIMDE_FLOAT16_VALUE(24.69)}, + { { SIMDE_FLOAT16_VALUE(-4.91), SIMDE_FLOAT16_VALUE(-4.91), SIMDE_FLOAT16_VALUE(-4.91), SIMDE_FLOAT16_VALUE(-4.91), + SIMDE_FLOAT16_VALUE(-4.91), SIMDE_FLOAT16_VALUE(-4.91), SIMDE_FLOAT16_VALUE(-4.91), SIMDE_FLOAT16_VALUE(-4.91) }, + { SIMDE_FLOAT16_VALUE(-25.17), SIMDE_FLOAT16_VALUE(-25.17), SIMDE_FLOAT16_VALUE(-25.17), SIMDE_FLOAT16_VALUE(-25.17), + SIMDE_FLOAT16_VALUE(-25.17), SIMDE_FLOAT16_VALUE(-25.17), SIMDE_FLOAT16_VALUE(-25.17), SIMDE_FLOAT16_VALUE(-25.17) }, + { SIMDE_FLOAT16_VALUE(45.00), SIMDE_FLOAT16_VALUE(45.00), SIMDE_FLOAT16_VALUE(45.00), SIMDE_FLOAT16_VALUE(45.00), + SIMDE_FLOAT16_VALUE(45.00), SIMDE_FLOAT16_VALUE(45.00), SIMDE_FLOAT16_VALUE(45.00), SIMDE_FLOAT16_VALUE(45.00) } } }, + { { SIMDE_FLOAT16_VALUE(-47.11), SIMDE_FLOAT16_VALUE(-3.75), SIMDE_FLOAT16_VALUE(44.40) }, + { SIMDE_FLOAT16_VALUE(-8.68), SIMDE_FLOAT16_VALUE(49.28), SIMDE_FLOAT16_VALUE(49.67)}, + { { SIMDE_FLOAT16_VALUE(-47.11), SIMDE_FLOAT16_VALUE(-47.11), SIMDE_FLOAT16_VALUE(-47.11), SIMDE_FLOAT16_VALUE(-47.11), + SIMDE_FLOAT16_VALUE(-47.11), SIMDE_FLOAT16_VALUE(-47.11), SIMDE_FLOAT16_VALUE(-47.11), SIMDE_FLOAT16_VALUE(-47.11) }, + { SIMDE_FLOAT16_VALUE(-3.75), SIMDE_FLOAT16_VALUE(-3.75), SIMDE_FLOAT16_VALUE(-3.75), SIMDE_FLOAT16_VALUE(-3.75), + SIMDE_FLOAT16_VALUE(-3.75), SIMDE_FLOAT16_VALUE(-3.75), SIMDE_FLOAT16_VALUE(-3.75), SIMDE_FLOAT16_VALUE(-3.75) }, + { SIMDE_FLOAT16_VALUE(44.40), SIMDE_FLOAT16_VALUE(44.40), SIMDE_FLOAT16_VALUE(44.40), SIMDE_FLOAT16_VALUE(44.40), + SIMDE_FLOAT16_VALUE(44.40), SIMDE_FLOAT16_VALUE(44.40), SIMDE_FLOAT16_VALUE(44.40), SIMDE_FLOAT16_VALUE(44.40) } } }, + { { SIMDE_FLOAT16_VALUE(-32.16), SIMDE_FLOAT16_VALUE(30.51), SIMDE_FLOAT16_VALUE(15.52) }, + { SIMDE_FLOAT16_VALUE(24.82), SIMDE_FLOAT16_VALUE(34.29), SIMDE_FLOAT16_VALUE(-0.76)}, + { { SIMDE_FLOAT16_VALUE(-32.16), SIMDE_FLOAT16_VALUE(-32.16), SIMDE_FLOAT16_VALUE(-32.16), SIMDE_FLOAT16_VALUE(-32.16), + SIMDE_FLOAT16_VALUE(-32.16), SIMDE_FLOAT16_VALUE(-32.16), SIMDE_FLOAT16_VALUE(-32.16), SIMDE_FLOAT16_VALUE(-32.16) }, + { SIMDE_FLOAT16_VALUE(30.51), SIMDE_FLOAT16_VALUE(30.51), SIMDE_FLOAT16_VALUE(30.51), SIMDE_FLOAT16_VALUE(30.51), + SIMDE_FLOAT16_VALUE(30.51), SIMDE_FLOAT16_VALUE(30.51), SIMDE_FLOAT16_VALUE(30.51), SIMDE_FLOAT16_VALUE(30.51) }, + { SIMDE_FLOAT16_VALUE(15.52), SIMDE_FLOAT16_VALUE(15.52), SIMDE_FLOAT16_VALUE(15.52), SIMDE_FLOAT16_VALUE(15.52), + SIMDE_FLOAT16_VALUE(15.52), SIMDE_FLOAT16_VALUE(15.52), SIMDE_FLOAT16_VALUE(15.52), SIMDE_FLOAT16_VALUE(15.52) } } }, + { { SIMDE_FLOAT16_VALUE(-15.98), SIMDE_FLOAT16_VALUE(9.92), SIMDE_FLOAT16_VALUE(-43.38) }, + { SIMDE_FLOAT16_VALUE(3.24), SIMDE_FLOAT16_VALUE(-29.83), SIMDE_FLOAT16_VALUE(34.76)}, + { { SIMDE_FLOAT16_VALUE(-15.98), SIMDE_FLOAT16_VALUE(-15.98), SIMDE_FLOAT16_VALUE(-15.98), SIMDE_FLOAT16_VALUE(-15.98), + SIMDE_FLOAT16_VALUE(-15.98), SIMDE_FLOAT16_VALUE(-15.98), SIMDE_FLOAT16_VALUE(-15.98), SIMDE_FLOAT16_VALUE(-15.98) }, + { SIMDE_FLOAT16_VALUE(9.92), SIMDE_FLOAT16_VALUE(9.92), SIMDE_FLOAT16_VALUE(9.92), SIMDE_FLOAT16_VALUE(9.92), + SIMDE_FLOAT16_VALUE(9.92), SIMDE_FLOAT16_VALUE(9.92), SIMDE_FLOAT16_VALUE(9.92), SIMDE_FLOAT16_VALUE(9.92) }, + { SIMDE_FLOAT16_VALUE(-43.38), SIMDE_FLOAT16_VALUE(-43.38), SIMDE_FLOAT16_VALUE(-43.38), SIMDE_FLOAT16_VALUE(-43.38), + SIMDE_FLOAT16_VALUE(-43.38), SIMDE_FLOAT16_VALUE(-43.38), SIMDE_FLOAT16_VALUE(-43.38), SIMDE_FLOAT16_VALUE(-43.38) } } }, + { { SIMDE_FLOAT16_VALUE(34.74), SIMDE_FLOAT16_VALUE(3.30), SIMDE_FLOAT16_VALUE(-43.69) }, + { SIMDE_FLOAT16_VALUE(-47.12), SIMDE_FLOAT16_VALUE(-48.25), SIMDE_FLOAT16_VALUE(-34.42)}, + { { SIMDE_FLOAT16_VALUE(34.74), SIMDE_FLOAT16_VALUE(34.74), SIMDE_FLOAT16_VALUE(34.74), SIMDE_FLOAT16_VALUE(34.74), + SIMDE_FLOAT16_VALUE(34.74), SIMDE_FLOAT16_VALUE(34.74), SIMDE_FLOAT16_VALUE(34.74), SIMDE_FLOAT16_VALUE(34.74) }, + { SIMDE_FLOAT16_VALUE(3.30), SIMDE_FLOAT16_VALUE(3.30), SIMDE_FLOAT16_VALUE(3.30), SIMDE_FLOAT16_VALUE(3.30), + SIMDE_FLOAT16_VALUE(3.30), SIMDE_FLOAT16_VALUE(3.30), SIMDE_FLOAT16_VALUE(3.30), SIMDE_FLOAT16_VALUE(3.30) }, + { SIMDE_FLOAT16_VALUE(-43.69), SIMDE_FLOAT16_VALUE(-43.69), SIMDE_FLOAT16_VALUE(-43.69), SIMDE_FLOAT16_VALUE(-43.69), + SIMDE_FLOAT16_VALUE(-43.69), SIMDE_FLOAT16_VALUE(-43.69), SIMDE_FLOAT16_VALUE(-43.69), SIMDE_FLOAT16_VALUE(-43.69) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8x3_t r = simde_vld3q_dup_f16(test_vec[i].a); + simde_float16x8x3_t expected = { + {simde_vld1q_f16(test_vec[i].r[0]), simde_vld1q_f16(test_vec[i].r[1]), simde_vld1q_f16(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_f16x8(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f16x8(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f16x8(r.val[2], expected.val[2], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld3q_dup_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + float a[3]; + float unused[3]; + float r[3][4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(3697.78), SIMDE_FLOAT32_C(2264.54), SIMDE_FLOAT32_C(1602.33) }, + { SIMDE_FLOAT32_C(2604.95), SIMDE_FLOAT32_C(3726.11), SIMDE_FLOAT32_C(-1452.17)}, + { { SIMDE_FLOAT32_C(3697.78), SIMDE_FLOAT32_C(3697.78), SIMDE_FLOAT32_C(3697.78), SIMDE_FLOAT32_C(3697.78) }, + { SIMDE_FLOAT32_C(2264.54), SIMDE_FLOAT32_C(2264.54), SIMDE_FLOAT32_C(2264.54), SIMDE_FLOAT32_C(2264.54) }, + { SIMDE_FLOAT32_C(1602.33), SIMDE_FLOAT32_C(1602.33), SIMDE_FLOAT32_C(1602.33), SIMDE_FLOAT32_C(1602.33) } } }, + { { SIMDE_FLOAT32_C(2171.36), SIMDE_FLOAT32_C(-1728.35), SIMDE_FLOAT32_C(-1392.20) }, + { SIMDE_FLOAT32_C(-373.74), SIMDE_FLOAT32_C(-2139.62), SIMDE_FLOAT32_C(2081.32)}, + { { SIMDE_FLOAT32_C(2171.36), SIMDE_FLOAT32_C(2171.36), SIMDE_FLOAT32_C(2171.36), SIMDE_FLOAT32_C(2171.36) }, + { SIMDE_FLOAT32_C(-1728.35), SIMDE_FLOAT32_C(-1728.35), SIMDE_FLOAT32_C(-1728.35), SIMDE_FLOAT32_C(-1728.35) }, + { SIMDE_FLOAT32_C(-1392.20), SIMDE_FLOAT32_C(-1392.20), SIMDE_FLOAT32_C(-1392.20), SIMDE_FLOAT32_C(-1392.20) } } }, + { { SIMDE_FLOAT32_C(2884.54), SIMDE_FLOAT32_C(1531.32), SIMDE_FLOAT32_C(-2902.25) }, + { SIMDE_FLOAT32_C(-4095.36), SIMDE_FLOAT32_C(4395.19), SIMDE_FLOAT32_C(-4907.26)}, + { { SIMDE_FLOAT32_C(2884.54), SIMDE_FLOAT32_C(2884.54), SIMDE_FLOAT32_C(2884.54), SIMDE_FLOAT32_C(2884.54) }, + { SIMDE_FLOAT32_C(1531.32), SIMDE_FLOAT32_C(1531.32), SIMDE_FLOAT32_C(1531.32), SIMDE_FLOAT32_C(1531.32) }, + { SIMDE_FLOAT32_C(-2902.25), SIMDE_FLOAT32_C(-2902.25), SIMDE_FLOAT32_C(-2902.25), SIMDE_FLOAT32_C(-2902.25) } } }, + { { SIMDE_FLOAT32_C(174.64), SIMDE_FLOAT32_C(-2150.06), SIMDE_FLOAT32_C(2897.93) }, + { SIMDE_FLOAT32_C(-2988.20), SIMDE_FLOAT32_C(-575.59), SIMDE_FLOAT32_C(3191.50)}, + { { SIMDE_FLOAT32_C(174.64), SIMDE_FLOAT32_C(174.64), SIMDE_FLOAT32_C(174.64), SIMDE_FLOAT32_C(174.64) }, + { SIMDE_FLOAT32_C(-2150.06), SIMDE_FLOAT32_C(-2150.06), SIMDE_FLOAT32_C(-2150.06), SIMDE_FLOAT32_C(-2150.06) }, + { SIMDE_FLOAT32_C(2897.93), SIMDE_FLOAT32_C(2897.93), SIMDE_FLOAT32_C(2897.93), SIMDE_FLOAT32_C(2897.93) } } }, + { { SIMDE_FLOAT32_C(-3237.71), SIMDE_FLOAT32_C(3879.02), SIMDE_FLOAT32_C(4954.54) }, + { SIMDE_FLOAT32_C(3405.09), SIMDE_FLOAT32_C(224.82), SIMDE_FLOAT32_C(-180.79)}, + { { SIMDE_FLOAT32_C(-3237.71), SIMDE_FLOAT32_C(-3237.71), SIMDE_FLOAT32_C(-3237.71), SIMDE_FLOAT32_C(-3237.71) }, + { SIMDE_FLOAT32_C(3879.02), SIMDE_FLOAT32_C(3879.02), SIMDE_FLOAT32_C(3879.02), SIMDE_FLOAT32_C(3879.02) }, + { SIMDE_FLOAT32_C(4954.54), SIMDE_FLOAT32_C(4954.54), SIMDE_FLOAT32_C(4954.54), SIMDE_FLOAT32_C(4954.54) } } }, + { { SIMDE_FLOAT32_C(-1667.55), SIMDE_FLOAT32_C(-1961.29), SIMDE_FLOAT32_C(-4785.81) }, + { SIMDE_FLOAT32_C(-988.22), SIMDE_FLOAT32_C(645.67), SIMDE_FLOAT32_C(-4900.78)}, + { { SIMDE_FLOAT32_C(-1667.55), SIMDE_FLOAT32_C(-1667.55), SIMDE_FLOAT32_C(-1667.55), SIMDE_FLOAT32_C(-1667.55) }, + { SIMDE_FLOAT32_C(-1961.29), SIMDE_FLOAT32_C(-1961.29), SIMDE_FLOAT32_C(-1961.29), SIMDE_FLOAT32_C(-1961.29) }, + { SIMDE_FLOAT32_C(-4785.81), SIMDE_FLOAT32_C(-4785.81), SIMDE_FLOAT32_C(-4785.81), SIMDE_FLOAT32_C(-4785.81) } } }, + { { SIMDE_FLOAT32_C(-1617.37), SIMDE_FLOAT32_C(-3727.98), SIMDE_FLOAT32_C(1464.46) }, + { SIMDE_FLOAT32_C(-3143.52), SIMDE_FLOAT32_C(-3238.88), SIMDE_FLOAT32_C(2569.95)}, + { { SIMDE_FLOAT32_C(-1617.37), SIMDE_FLOAT32_C(-1617.37), SIMDE_FLOAT32_C(-1617.37), SIMDE_FLOAT32_C(-1617.37) }, + { SIMDE_FLOAT32_C(-3727.98), SIMDE_FLOAT32_C(-3727.98), SIMDE_FLOAT32_C(-3727.98), SIMDE_FLOAT32_C(-3727.98) }, + { SIMDE_FLOAT32_C(1464.46), SIMDE_FLOAT32_C(1464.46), SIMDE_FLOAT32_C(1464.46), SIMDE_FLOAT32_C(1464.46) } } }, + { { SIMDE_FLOAT32_C(896.32), SIMDE_FLOAT32_C(3033.53), SIMDE_FLOAT32_C(2717.79) }, + { SIMDE_FLOAT32_C(-2243.74), SIMDE_FLOAT32_C(2624.94), SIMDE_FLOAT32_C(4491.28)}, + { { SIMDE_FLOAT32_C(896.32), SIMDE_FLOAT32_C(896.32), SIMDE_FLOAT32_C(896.32), SIMDE_FLOAT32_C(896.32) }, + { SIMDE_FLOAT32_C(3033.53), SIMDE_FLOAT32_C(3033.53), SIMDE_FLOAT32_C(3033.53), SIMDE_FLOAT32_C(3033.53) }, + { SIMDE_FLOAT32_C(2717.79), SIMDE_FLOAT32_C(2717.79), SIMDE_FLOAT32_C(2717.79), SIMDE_FLOAT32_C(2717.79) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4x3_t r = simde_vld3q_dup_f32(test_vec[i].a); + simde_float32x4x3_t expected = { + {simde_vld1q_f32(test_vec[i].r[0]), simde_vld1q_f32(test_vec[i].r[1]), simde_vld1q_f32(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_f32x4(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f32x4(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f32x4(r.val[2], expected.val[2], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld3q_dup_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64 a[3]; + simde_float64 unused[3]; + simde_float64 r[3][2]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(-76188.51), SIMDE_FLOAT64_C(-158100.44), SIMDE_FLOAT64_C(450863.97) }, + { SIMDE_FLOAT64_C(-419497.68), SIMDE_FLOAT64_C(486477.94), SIMDE_FLOAT64_C(-161711.22)}, + { { SIMDE_FLOAT64_C(-76188.51), SIMDE_FLOAT64_C(-76188.51) }, + { SIMDE_FLOAT64_C(-158100.44), SIMDE_FLOAT64_C(-158100.44) }, + { SIMDE_FLOAT64_C(450863.97), SIMDE_FLOAT64_C(450863.97) } } }, + { { SIMDE_FLOAT64_C(-311999.81), SIMDE_FLOAT64_C(-133166.59), SIMDE_FLOAT64_C(19751.09) }, + { SIMDE_FLOAT64_C(381296.77), SIMDE_FLOAT64_C(4280.17), SIMDE_FLOAT64_C(200621.38)}, + { { SIMDE_FLOAT64_C(-311999.81), SIMDE_FLOAT64_C(-311999.81) }, + { SIMDE_FLOAT64_C(-133166.59), SIMDE_FLOAT64_C(-133166.59) }, + { SIMDE_FLOAT64_C(19751.09), SIMDE_FLOAT64_C(19751.09) } } }, + { { SIMDE_FLOAT64_C(431577.95), SIMDE_FLOAT64_C(77634.91), SIMDE_FLOAT64_C(266670.63) }, + { SIMDE_FLOAT64_C(53.33), SIMDE_FLOAT64_C(-384944.02), SIMDE_FLOAT64_C(283777.76)}, + { { SIMDE_FLOAT64_C(431577.95), SIMDE_FLOAT64_C(431577.95) }, + { SIMDE_FLOAT64_C(77634.91), SIMDE_FLOAT64_C(77634.91) }, + { SIMDE_FLOAT64_C(266670.63), SIMDE_FLOAT64_C(266670.63) } } }, + { { SIMDE_FLOAT64_C(-158517.04), SIMDE_FLOAT64_C(-133983.50), SIMDE_FLOAT64_C(338367.02) }, + { SIMDE_FLOAT64_C(-188683.95), SIMDE_FLOAT64_C(-130665.38), SIMDE_FLOAT64_C(393704.35)}, + { { SIMDE_FLOAT64_C(-158517.04), SIMDE_FLOAT64_C(-158517.04) }, + { SIMDE_FLOAT64_C(-133983.50), SIMDE_FLOAT64_C(-133983.50) }, + { SIMDE_FLOAT64_C(338367.02), SIMDE_FLOAT64_C(338367.02) } } }, + { { SIMDE_FLOAT64_C(-304331.98), SIMDE_FLOAT64_C(378917.44), SIMDE_FLOAT64_C(214241.91) }, + { SIMDE_FLOAT64_C(294684.17), SIMDE_FLOAT64_C(75983.67), SIMDE_FLOAT64_C(143540.59)}, + { { SIMDE_FLOAT64_C(-304331.98), SIMDE_FLOAT64_C(-304331.98) }, + { SIMDE_FLOAT64_C(378917.44), SIMDE_FLOAT64_C(378917.44) }, + { SIMDE_FLOAT64_C(214241.91), SIMDE_FLOAT64_C(214241.91) } } }, + { { SIMDE_FLOAT64_C(177828.72), SIMDE_FLOAT64_C(182291.88), SIMDE_FLOAT64_C(133239.91) }, + { SIMDE_FLOAT64_C(-367510.55), SIMDE_FLOAT64_C(11412.91), SIMDE_FLOAT64_C(-249368.85)}, + { { SIMDE_FLOAT64_C(177828.72), SIMDE_FLOAT64_C(177828.72) }, + { SIMDE_FLOAT64_C(182291.88), SIMDE_FLOAT64_C(182291.88) }, + { SIMDE_FLOAT64_C(133239.91), SIMDE_FLOAT64_C(133239.91) } } }, + { { SIMDE_FLOAT64_C(480108.45), SIMDE_FLOAT64_C(-112601.79), SIMDE_FLOAT64_C(-401218.95) }, + { SIMDE_FLOAT64_C(-132521.78), SIMDE_FLOAT64_C(-454060.72), SIMDE_FLOAT64_C(92070.34)}, + { { SIMDE_FLOAT64_C(480108.45), SIMDE_FLOAT64_C(480108.45) }, + { SIMDE_FLOAT64_C(-112601.79), SIMDE_FLOAT64_C(-112601.79) }, + { SIMDE_FLOAT64_C(-401218.95), SIMDE_FLOAT64_C(-401218.95) } } }, + { { SIMDE_FLOAT64_C(-209344.40), SIMDE_FLOAT64_C(-38397.36), SIMDE_FLOAT64_C(171975.62) }, + { SIMDE_FLOAT64_C(-306358.76), SIMDE_FLOAT64_C(-389466.37), SIMDE_FLOAT64_C(459048.36)}, + { { SIMDE_FLOAT64_C(-209344.40), SIMDE_FLOAT64_C(-209344.40) }, + { SIMDE_FLOAT64_C(-38397.36), SIMDE_FLOAT64_C(-38397.36) }, + { SIMDE_FLOAT64_C(171975.62), SIMDE_FLOAT64_C(171975.62) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2x3_t r = simde_vld3q_dup_f64(test_vec[i].a); + simde_float64x2x3_t expected = { + {simde_vld1q_f64(test_vec[i].r[0]), simde_vld1q_f64(test_vec[i].r[1]), simde_vld1q_f64(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_f64x2(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f64x2(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f64x2(r.val[2], expected.val[2], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld3q_dup_s8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int8_t a[3]; + int8_t unused[3]; + int8_t r[3][16]; + } test_vec[] = { + { { INT8_C(4), INT8_C(83), -INT8_C(85) }, + { -INT8_C(88), -INT8_C(66), INT8_C(59)}, + { { INT8_C(4), INT8_C(4), INT8_C(4), INT8_C(4), INT8_C(4), INT8_C(4), INT8_C(4), INT8_C(4), + INT8_C(4), INT8_C(4), INT8_C(4), INT8_C(4), INT8_C(4), INT8_C(4), INT8_C(4), INT8_C(4) }, + { INT8_C(83), INT8_C(83), INT8_C(83), INT8_C(83), INT8_C(83), INT8_C(83), INT8_C(83), INT8_C(83), + INT8_C(83), INT8_C(83), INT8_C(83), INT8_C(83), INT8_C(83), INT8_C(83), INT8_C(83), INT8_C(83) }, + { -INT8_C(85), -INT8_C(85), -INT8_C(85), -INT8_C(85), -INT8_C(85), -INT8_C(85), -INT8_C(85), -INT8_C(85), + -INT8_C(85), -INT8_C(85), -INT8_C(85), -INT8_C(85), -INT8_C(85), -INT8_C(85), -INT8_C(85), -INT8_C(85) } } }, + { { -INT8_C(22), INT8_C(10), -INT8_C(86) }, + { -INT8_C(79), -INT8_C(34), -INT8_C(27)}, + { { -INT8_C(22), -INT8_C(22), -INT8_C(22), -INT8_C(22), -INT8_C(22), -INT8_C(22), -INT8_C(22), -INT8_C(22), + -INT8_C(22), -INT8_C(22), -INT8_C(22), -INT8_C(22), -INT8_C(22), -INT8_C(22), -INT8_C(22), -INT8_C(22) }, + { INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10), + INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10) }, + { -INT8_C(86), -INT8_C(86), -INT8_C(86), -INT8_C(86), -INT8_C(86), -INT8_C(86), -INT8_C(86), -INT8_C(86), + -INT8_C(86), -INT8_C(86), -INT8_C(86), -INT8_C(86), -INT8_C(86), -INT8_C(86), -INT8_C(86), -INT8_C(86) } } }, + { { -INT8_C(6), -INT8_C(70), -INT8_C(75) }, + { INT8_C(64), -INT8_C(62), INT8_C(94)}, + { { -INT8_C(6), -INT8_C(6), -INT8_C(6), -INT8_C(6), -INT8_C(6), -INT8_C(6), -INT8_C(6), -INT8_C(6), + -INT8_C(6), -INT8_C(6), -INT8_C(6), -INT8_C(6), -INT8_C(6), -INT8_C(6), -INT8_C(6), -INT8_C(6) }, + { -INT8_C(70), -INT8_C(70), -INT8_C(70), -INT8_C(70), -INT8_C(70), -INT8_C(70), -INT8_C(70), -INT8_C(70), + -INT8_C(70), -INT8_C(70), -INT8_C(70), -INT8_C(70), -INT8_C(70), -INT8_C(70), -INT8_C(70), -INT8_C(70) }, + { -INT8_C(75), -INT8_C(75), -INT8_C(75), -INT8_C(75), -INT8_C(75), -INT8_C(75), -INT8_C(75), -INT8_C(75), + -INT8_C(75), -INT8_C(75), -INT8_C(75), -INT8_C(75), -INT8_C(75), -INT8_C(75), -INT8_C(75), -INT8_C(75) } } }, + { { -INT8_C(42), INT8_C(47), -INT8_C(43) }, + { INT8_C(90), INT8_C(56), INT8_C(31)}, + { { -INT8_C(42), -INT8_C(42), -INT8_C(42), -INT8_C(42), -INT8_C(42), -INT8_C(42), -INT8_C(42), -INT8_C(42), + -INT8_C(42), -INT8_C(42), -INT8_C(42), -INT8_C(42), -INT8_C(42), -INT8_C(42), -INT8_C(42), -INT8_C(42) }, + { INT8_C(47), INT8_C(47), INT8_C(47), INT8_C(47), INT8_C(47), INT8_C(47), INT8_C(47), INT8_C(47), + INT8_C(47), INT8_C(47), INT8_C(47), INT8_C(47), INT8_C(47), INT8_C(47), INT8_C(47), INT8_C(47) }, + { -INT8_C(43), -INT8_C(43), -INT8_C(43), -INT8_C(43), -INT8_C(43), -INT8_C(43), -INT8_C(43), -INT8_C(43), + -INT8_C(43), -INT8_C(43), -INT8_C(43), -INT8_C(43), -INT8_C(43), -INT8_C(43), -INT8_C(43), -INT8_C(43) } } }, + { { INT8_C(88), -INT8_C(67), -INT8_C(36) }, + { INT8_C(56), -INT8_C(54), INT8_C(66)}, + { { INT8_C(88), INT8_C(88), INT8_C(88), INT8_C(88), INT8_C(88), INT8_C(88), INT8_C(88), INT8_C(88), + INT8_C(88), INT8_C(88), INT8_C(88), INT8_C(88), INT8_C(88), INT8_C(88), INT8_C(88), INT8_C(88) }, + { -INT8_C(67), -INT8_C(67), -INT8_C(67), -INT8_C(67), -INT8_C(67), -INT8_C(67), -INT8_C(67), -INT8_C(67), + -INT8_C(67), -INT8_C(67), -INT8_C(67), -INT8_C(67), -INT8_C(67), -INT8_C(67), -INT8_C(67), -INT8_C(67) }, + { -INT8_C(36), -INT8_C(36), -INT8_C(36), -INT8_C(36), -INT8_C(36), -INT8_C(36), -INT8_C(36), -INT8_C(36), + -INT8_C(36), -INT8_C(36), -INT8_C(36), -INT8_C(36), -INT8_C(36), -INT8_C(36), -INT8_C(36), -INT8_C(36) } } }, + { { -INT8_C(58), -INT8_C(5), INT8_C(64) }, + { -INT8_C(87), -INT8_C(97), -INT8_C(71)}, + { { -INT8_C(58), -INT8_C(58), -INT8_C(58), -INT8_C(58), -INT8_C(58), -INT8_C(58), -INT8_C(58), -INT8_C(58), + -INT8_C(58), -INT8_C(58), -INT8_C(58), -INT8_C(58), -INT8_C(58), -INT8_C(58), -INT8_C(58), -INT8_C(58) }, + { -INT8_C(5), -INT8_C(5), -INT8_C(5), -INT8_C(5), -INT8_C(5), -INT8_C(5), -INT8_C(5), -INT8_C(5), + -INT8_C(5), -INT8_C(5), -INT8_C(5), -INT8_C(5), -INT8_C(5), -INT8_C(5), -INT8_C(5), -INT8_C(5) }, + { INT8_C(64), INT8_C(64), INT8_C(64), INT8_C(64), INT8_C(64), INT8_C(64), INT8_C(64), INT8_C(64), + INT8_C(64), INT8_C(64), INT8_C(64), INT8_C(64), INT8_C(64), INT8_C(64), INT8_C(64), INT8_C(64) } } }, + { { -INT8_C(68), INT8_C(98), INT8_C(64) }, + { -INT8_C(65), INT8_C(25), INT8_C(97)}, + { { -INT8_C(68), -INT8_C(68), -INT8_C(68), -INT8_C(68), -INT8_C(68), -INT8_C(68), -INT8_C(68), -INT8_C(68), + -INT8_C(68), -INT8_C(68), -INT8_C(68), -INT8_C(68), -INT8_C(68), -INT8_C(68), -INT8_C(68), -INT8_C(68) }, + { INT8_C(98), INT8_C(98), INT8_C(98), INT8_C(98), INT8_C(98), INT8_C(98), INT8_C(98), INT8_C(98), + INT8_C(98), INT8_C(98), INT8_C(98), INT8_C(98), INT8_C(98), INT8_C(98), INT8_C(98), INT8_C(98) }, + { INT8_C(64), INT8_C(64), INT8_C(64), INT8_C(64), INT8_C(64), INT8_C(64), INT8_C(64), INT8_C(64), + INT8_C(64), INT8_C(64), INT8_C(64), INT8_C(64), INT8_C(64), INT8_C(64), INT8_C(64), INT8_C(64) } } }, + { { -INT8_C(27), -INT8_C(43), -INT8_C(10) }, + { INT8_C(0), INT8_C(27), -INT8_C(24)}, + { { -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27), + -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27) }, + { -INT8_C(43), -INT8_C(43), -INT8_C(43), -INT8_C(43), -INT8_C(43), -INT8_C(43), -INT8_C(43), -INT8_C(43), + -INT8_C(43), -INT8_C(43), -INT8_C(43), -INT8_C(43), -INT8_C(43), -INT8_C(43), -INT8_C(43), -INT8_C(43) }, + { -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10), + -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x16x3_t r = simde_vld3q_dup_s8(test_vec[i].a); + simde_int8x16x3_t expected = { + {simde_vld1q_s8(test_vec[i].r[0]), simde_vld1q_s8(test_vec[i].r[1]), simde_vld1q_s8(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_i8x16(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i8x16(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i8x16(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3q_dup_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[3]; + int16_t unused[3]; + int16_t r[3][8]; + } test_vec[] = { + { { INT16_C(3070), INT16_C(3325), -INT16_C(8375) }, + { -INT16_C(8688), -INT16_C(7594), INT16_C(4565)}, + { { INT16_C(3070), INT16_C(3070), INT16_C(3070), INT16_C(3070), + INT16_C(3070), INT16_C(3070), INT16_C(3070), INT16_C(3070) }, + { INT16_C(3325), INT16_C(3325), INT16_C(3325), INT16_C(3325), + INT16_C(3325), INT16_C(3325), INT16_C(3325), INT16_C(3325) }, + { -INT16_C(8375), -INT16_C(8375), -INT16_C(8375), -INT16_C(8375), + -INT16_C(8375), -INT16_C(8375), -INT16_C(8375), -INT16_C(8375) } } }, + { { -INT16_C(4204), INT16_C(8721), INT16_C(5623) }, + { -INT16_C(472), INT16_C(2671), INT16_C(1200)}, + { { -INT16_C(4204), -INT16_C(4204), -INT16_C(4204), -INT16_C(4204), + -INT16_C(4204), -INT16_C(4204), -INT16_C(4204), -INT16_C(4204) }, + { INT16_C(8721), INT16_C(8721), INT16_C(8721), INT16_C(8721), + INT16_C(8721), INT16_C(8721), INT16_C(8721), INT16_C(8721) }, + { INT16_C(5623), INT16_C(5623), INT16_C(5623), INT16_C(5623), + INT16_C(5623), INT16_C(5623), INT16_C(5623), INT16_C(5623) } } }, + { { INT16_C(6944), INT16_C(2570), INT16_C(7777) }, + { -INT16_C(6788), -INT16_C(5739), INT16_C(634)}, + { { INT16_C(6944), INT16_C(6944), INT16_C(6944), INT16_C(6944), + INT16_C(6944), INT16_C(6944), INT16_C(6944), INT16_C(6944) }, + { INT16_C(2570), INT16_C(2570), INT16_C(2570), INT16_C(2570), + INT16_C(2570), INT16_C(2570), INT16_C(2570), INT16_C(2570) }, + { INT16_C(7777), INT16_C(7777), INT16_C(7777), INT16_C(7777), + INT16_C(7777), INT16_C(7777), INT16_C(7777), INT16_C(7777) } } }, + { { -INT16_C(2506), INT16_C(2490), -INT16_C(5108) }, + { -INT16_C(4656), -INT16_C(804), INT16_C(9299)}, + { { -INT16_C(2506), -INT16_C(2506), -INT16_C(2506), -INT16_C(2506), + -INT16_C(2506), -INT16_C(2506), -INT16_C(2506), -INT16_C(2506) }, + { INT16_C(2490), INT16_C(2490), INT16_C(2490), INT16_C(2490), + INT16_C(2490), INT16_C(2490), INT16_C(2490), INT16_C(2490) }, + { -INT16_C(5108), -INT16_C(5108), -INT16_C(5108), -INT16_C(5108), + -INT16_C(5108), -INT16_C(5108), -INT16_C(5108), -INT16_C(5108) } } }, + { { INT16_C(7062), -INT16_C(9613), INT16_C(5275) }, + { INT16_C(9211), -INT16_C(3971), INT16_C(2988)}, + { { INT16_C(7062), INT16_C(7062), INT16_C(7062), INT16_C(7062), + INT16_C(7062), INT16_C(7062), INT16_C(7062), INT16_C(7062) }, + { -INT16_C(9613), -INT16_C(9613), -INT16_C(9613), -INT16_C(9613), + -INT16_C(9613), -INT16_C(9613), -INT16_C(9613), -INT16_C(9613) }, + { INT16_C(5275), INT16_C(5275), INT16_C(5275), INT16_C(5275), + INT16_C(5275), INT16_C(5275), INT16_C(5275), INT16_C(5275) } } }, + { { -INT16_C(2278), INT16_C(2234), INT16_C(8224) }, + { INT16_C(377), INT16_C(8197), INT16_C(9563)}, + { { -INT16_C(2278), -INT16_C(2278), -INT16_C(2278), -INT16_C(2278), + -INT16_C(2278), -INT16_C(2278), -INT16_C(2278), -INT16_C(2278) }, + { INT16_C(2234), INT16_C(2234), INT16_C(2234), INT16_C(2234), + INT16_C(2234), INT16_C(2234), INT16_C(2234), INT16_C(2234) }, + { INT16_C(8224), INT16_C(8224), INT16_C(8224), INT16_C(8224), + INT16_C(8224), INT16_C(8224), INT16_C(8224), INT16_C(8224) } } }, + { { INT16_C(8937), -INT16_C(9972), INT16_C(4720) }, + { -INT16_C(5849), INT16_C(8449), INT16_C(7259)}, + { { INT16_C(8937), INT16_C(8937), INT16_C(8937), INT16_C(8937), + INT16_C(8937), INT16_C(8937), INT16_C(8937), INT16_C(8937) }, + { -INT16_C(9972), -INT16_C(9972), -INT16_C(9972), -INT16_C(9972), + -INT16_C(9972), -INT16_C(9972), -INT16_C(9972), -INT16_C(9972) }, + { INT16_C(4720), INT16_C(4720), INT16_C(4720), INT16_C(4720), + INT16_C(4720), INT16_C(4720), INT16_C(4720), INT16_C(4720) } } }, + { { -INT16_C(8860), -INT16_C(995), -INT16_C(3832) }, + { INT16_C(4136), -INT16_C(9880), -INT16_C(6950)}, + { { -INT16_C(8860), -INT16_C(8860), -INT16_C(8860), -INT16_C(8860), + -INT16_C(8860), -INT16_C(8860), -INT16_C(8860), -INT16_C(8860) }, + { -INT16_C(995), -INT16_C(995), -INT16_C(995), -INT16_C(995), + -INT16_C(995), -INT16_C(995), -INT16_C(995), -INT16_C(995) }, + { -INT16_C(3832), -INT16_C(3832), -INT16_C(3832), -INT16_C(3832), + -INT16_C(3832), -INT16_C(3832), -INT16_C(3832), -INT16_C(3832) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8x3_t r = simde_vld3q_dup_s16(test_vec[i].a); + simde_int16x8x3_t expected = { + {simde_vld1q_s16(test_vec[i].r[0]), simde_vld1q_s16(test_vec[i].r[1]), simde_vld1q_s16(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_i16x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i16x8(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i16x8(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3q_dup_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[3]; + int32_t unused[3]; + int32_t r[3][4]; + } test_vec[] = { + { { -INT32_C(220041), -INT32_C(136466), -INT32_C(639215) }, + { -INT32_C(317815), -INT32_C(462436), -INT32_C(731661)}, + { { -INT32_C(220041), -INT32_C(220041), -INT32_C(220041), -INT32_C(220041) }, + { -INT32_C(136466), -INT32_C(136466), -INT32_C(136466), -INT32_C(136466) }, + { -INT32_C(639215), -INT32_C(639215), -INT32_C(639215), -INT32_C(639215) } } }, + { { INT32_C(126360), INT32_C(731272), -INT32_C(643018) }, + { INT32_C(135164), INT32_C(44493), INT32_C(256793)}, + { { INT32_C(126360), INT32_C(126360), INT32_C(126360), INT32_C(126360) }, + { INT32_C(731272), INT32_C(731272), INT32_C(731272), INT32_C(731272) }, + { -INT32_C(643018), -INT32_C(643018), -INT32_C(643018), -INT32_C(643018) } } }, + { { -INT32_C(506698), INT32_C(847224), -INT32_C(263807) }, + { -INT32_C(322179), INT32_C(943052), INT32_C(403393)}, + { { -INT32_C(506698), -INT32_C(506698), -INT32_C(506698), -INT32_C(506698) }, + { INT32_C(847224), INT32_C(847224), INT32_C(847224), INT32_C(847224) }, + { -INT32_C(263807), -INT32_C(263807), -INT32_C(263807), -INT32_C(263807) } } }, + { { -INT32_C(269498), INT32_C(592114), -INT32_C(342573) }, + { -INT32_C(120068), -INT32_C(608432), -INT32_C(729451)}, + { { -INT32_C(269498), -INT32_C(269498), -INT32_C(269498), -INT32_C(269498) }, + { INT32_C(592114), INT32_C(592114), INT32_C(592114), INT32_C(592114) }, + { -INT32_C(342573), -INT32_C(342573), -INT32_C(342573), -INT32_C(342573) } } }, + { { -INT32_C(930622), -INT32_C(46600), -INT32_C(350088) }, + { -INT32_C(464765), -INT32_C(884750), INT32_C(205861)}, + { { -INT32_C(930622), -INT32_C(930622), -INT32_C(930622), -INT32_C(930622) }, + { -INT32_C(46600), -INT32_C(46600), -INT32_C(46600), -INT32_C(46600) }, + { -INT32_C(350088), -INT32_C(350088), -INT32_C(350088), -INT32_C(350088) } } }, + { { -INT32_C(243432), -INT32_C(205395), -INT32_C(893190) }, + { INT32_C(811036), INT32_C(262547), INT32_C(351628)}, + { { -INT32_C(243432), -INT32_C(243432), -INT32_C(243432), -INT32_C(243432) }, + { -INT32_C(205395), -INT32_C(205395), -INT32_C(205395), -INT32_C(205395) }, + { -INT32_C(893190), -INT32_C(893190), -INT32_C(893190), -INT32_C(893190) } } }, + { { INT32_C(347685), INT32_C(358948), INT32_C(914938) }, + { INT32_C(27640), INT32_C(919511), INT32_C(219147)}, + { { INT32_C(347685), INT32_C(347685), INT32_C(347685), INT32_C(347685) }, + { INT32_C(358948), INT32_C(358948), INT32_C(358948), INT32_C(358948) }, + { INT32_C(914938), INT32_C(914938), INT32_C(914938), INT32_C(914938) } } }, + { { INT32_C(561560), -INT32_C(274267), INT32_C(204485) }, + { INT32_C(765969), INT32_C(388245), INT32_C(257065)}, + { { INT32_C(561560), INT32_C(561560), INT32_C(561560), INT32_C(561560) }, + { -INT32_C(274267), -INT32_C(274267), -INT32_C(274267), -INT32_C(274267) }, + { INT32_C(204485), INT32_C(204485), INT32_C(204485), INT32_C(204485) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4x3_t r = simde_vld3q_dup_s32(test_vec[i].a); + simde_int32x4x3_t expected = { + {simde_vld1q_s32(test_vec[i].r[0]), simde_vld1q_s32(test_vec[i].r[1]), simde_vld1q_s32(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_i32x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i32x4(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i32x4(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3q_dup_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[3]; + int64_t unused[3]; + int64_t r[3][2]; + } test_vec[] = { + { { -INT64_C(79406386), INT64_C(41175464), INT64_C(7159478) }, + { INT64_C(30451828), INT64_C(73611566), INT64_C(82111454)}, + { { -INT64_C(79406386), -INT64_C(79406386) }, + { INT64_C(41175464), INT64_C(41175464) }, + { INT64_C(7159478), INT64_C(7159478) } } }, + { { -INT64_C(75960984), INT64_C(72605055), INT64_C(28446824) }, + { INT64_C(33111103), -INT64_C(83285820), INT64_C(74966460)}, + { { -INT64_C(75960984), -INT64_C(75960984) }, + { INT64_C(72605055), INT64_C(72605055) }, + { INT64_C(28446824), INT64_C(28446824) } } }, + { { INT64_C(12246277), INT64_C(13799747), INT64_C(5941147) }, + { -INT64_C(75658711), INT64_C(2033325), -INT64_C(46607368)}, + { { INT64_C(12246277), INT64_C(12246277) }, + { INT64_C(13799747), INT64_C(13799747) }, + { INT64_C(5941147), INT64_C(5941147) } } }, + { { -INT64_C(34340194), -INT64_C(85099997), INT64_C(58086303) }, + { -INT64_C(70737572), -INT64_C(35354753), -INT64_C(67958448)}, + { { -INT64_C(34340194), -INT64_C(34340194) }, + { -INT64_C(85099997), -INT64_C(85099997) }, + { INT64_C(58086303), INT64_C(58086303) } } }, + { { -INT64_C(55249984), -INT64_C(44930870), INT64_C(34202412) }, + { INT64_C(19535608), INT64_C(32670990), INT64_C(11914506)}, + { { -INT64_C(55249984), -INT64_C(55249984) }, + { -INT64_C(44930870), -INT64_C(44930870) }, + { INT64_C(34202412), INT64_C(34202412) } } }, + { { -INT64_C(70171393), -INT64_C(52064191), INT64_C(19678826) }, + { INT64_C(65818655), INT64_C(42062976), INT64_C(71093868)}, + { { -INT64_C(70171393), -INT64_C(70171393) }, + { -INT64_C(52064191), -INT64_C(52064191) }, + { INT64_C(19678826), INT64_C(19678826) } } }, + { { INT64_C(61111924), INT64_C(5653009), INT64_C(11261082) }, + { -INT64_C(29267150), INT64_C(49729783), INT64_C(59210528)}, + { { INT64_C(61111924), INT64_C(61111924) }, + { INT64_C(5653009), INT64_C(5653009) }, + { INT64_C(11261082), INT64_C(11261082) } } }, + { { -INT64_C(40289471), INT64_C(71520226), -INT64_C(74282823) }, + { INT64_C(65251834), INT64_C(91366098), INT64_C(57294064)}, + { { -INT64_C(40289471), -INT64_C(40289471) }, + { INT64_C(71520226), INT64_C(71520226) }, + { -INT64_C(74282823), -INT64_C(74282823) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2x3_t r = simde_vld3q_dup_s64(test_vec[i].a); + simde_int64x2x3_t expected = { + {simde_vld1q_s64(test_vec[i].r[0]), simde_vld1q_s64(test_vec[i].r[1]), simde_vld1q_s64(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_i64x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i64x2(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i64x2(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3q_dup_u8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint8_t a[3]; + uint8_t unused[3]; + uint8_t r[3][16]; + } test_vec[] = { + { { UINT8_C(131), UINT8_C(170), UINT8_C(140) }, + { UINT8_C(114), UINT8_C(199), UINT8_C(32)}, + { { UINT8_C(131), UINT8_C(131), UINT8_C(131), UINT8_C(131), UINT8_C(131), UINT8_C(131), UINT8_C(131), UINT8_C(131), + UINT8_C(131), UINT8_C(131), UINT8_C(131), UINT8_C(131), UINT8_C(131), UINT8_C(131), UINT8_C(131), UINT8_C(131) }, + { UINT8_C(170), UINT8_C(170), UINT8_C(170), UINT8_C(170), UINT8_C(170), UINT8_C(170), UINT8_C(170), UINT8_C(170), + UINT8_C(170), UINT8_C(170), UINT8_C(170), UINT8_C(170), UINT8_C(170), UINT8_C(170), UINT8_C(170), UINT8_C(170) }, + { UINT8_C(140), UINT8_C(140), UINT8_C(140), UINT8_C(140), UINT8_C(140), UINT8_C(140), UINT8_C(140), UINT8_C(140), + UINT8_C(140), UINT8_C(140), UINT8_C(140), UINT8_C(140), UINT8_C(140), UINT8_C(140), UINT8_C(140), UINT8_C(140) } } }, + { { UINT8_C(122), UINT8_C(34), UINT8_C(152) }, + { UINT8_C(111), UINT8_C(186), UINT8_C(172)}, + { { UINT8_C(122), UINT8_C(122), UINT8_C(122), UINT8_C(122), UINT8_C(122), UINT8_C(122), UINT8_C(122), UINT8_C(122), + UINT8_C(122), UINT8_C(122), UINT8_C(122), UINT8_C(122), UINT8_C(122), UINT8_C(122), UINT8_C(122), UINT8_C(122) }, + { UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34), + UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34) }, + { UINT8_C(152), UINT8_C(152), UINT8_C(152), UINT8_C(152), UINT8_C(152), UINT8_C(152), UINT8_C(152), UINT8_C(152), + UINT8_C(152), UINT8_C(152), UINT8_C(152), UINT8_C(152), UINT8_C(152), UINT8_C(152), UINT8_C(152), UINT8_C(152) } } }, + { { UINT8_C(198), UINT8_C(18), UINT8_C(101) }, + { UINT8_C(91), UINT8_C(119), UINT8_C(69)}, + { { UINT8_C(198), UINT8_C(198), UINT8_C(198), UINT8_C(198), UINT8_C(198), UINT8_C(198), UINT8_C(198), UINT8_C(198), + UINT8_C(198), UINT8_C(198), UINT8_C(198), UINT8_C(198), UINT8_C(198), UINT8_C(198), UINT8_C(198), UINT8_C(198) }, + { UINT8_C(18), UINT8_C(18), UINT8_C(18), UINT8_C(18), UINT8_C(18), UINT8_C(18), UINT8_C(18), UINT8_C(18), + UINT8_C(18), UINT8_C(18), UINT8_C(18), UINT8_C(18), UINT8_C(18), UINT8_C(18), UINT8_C(18), UINT8_C(18) }, + { UINT8_C(101), UINT8_C(101), UINT8_C(101), UINT8_C(101), UINT8_C(101), UINT8_C(101), UINT8_C(101), UINT8_C(101), + UINT8_C(101), UINT8_C(101), UINT8_C(101), UINT8_C(101), UINT8_C(101), UINT8_C(101), UINT8_C(101), UINT8_C(101) } } }, + { { UINT8_C(148), UINT8_C(113), UINT8_C(110) }, + { UINT8_C(57), UINT8_C(103), UINT8_C(149)}, + { { UINT8_C(148), UINT8_C(148), UINT8_C(148), UINT8_C(148), UINT8_C(148), UINT8_C(148), UINT8_C(148), UINT8_C(148), + UINT8_C(148), UINT8_C(148), UINT8_C(148), UINT8_C(148), UINT8_C(148), UINT8_C(148), UINT8_C(148), UINT8_C(148) }, + { UINT8_C(113), UINT8_C(113), UINT8_C(113), UINT8_C(113), UINT8_C(113), UINT8_C(113), UINT8_C(113), UINT8_C(113), + UINT8_C(113), UINT8_C(113), UINT8_C(113), UINT8_C(113), UINT8_C(113), UINT8_C(113), UINT8_C(113), UINT8_C(113) }, + { UINT8_C(110), UINT8_C(110), UINT8_C(110), UINT8_C(110), UINT8_C(110), UINT8_C(110), UINT8_C(110), UINT8_C(110), + UINT8_C(110), UINT8_C(110), UINT8_C(110), UINT8_C(110), UINT8_C(110), UINT8_C(110), UINT8_C(110), UINT8_C(110) } } }, + { { UINT8_C(13), UINT8_C(140), UINT8_C(72) }, + { UINT8_C(114), UINT8_C(161), UINT8_C(131)}, + { { UINT8_C(13), UINT8_C(13), UINT8_C(13), UINT8_C(13), UINT8_C(13), UINT8_C(13), UINT8_C(13), UINT8_C(13), + UINT8_C(13), UINT8_C(13), UINT8_C(13), UINT8_C(13), UINT8_C(13), UINT8_C(13), UINT8_C(13), UINT8_C(13) }, + { UINT8_C(140), UINT8_C(140), UINT8_C(140), UINT8_C(140), UINT8_C(140), UINT8_C(140), UINT8_C(140), UINT8_C(140), + UINT8_C(140), UINT8_C(140), UINT8_C(140), UINT8_C(140), UINT8_C(140), UINT8_C(140), UINT8_C(140), UINT8_C(140) }, + { UINT8_C(72), UINT8_C(72), UINT8_C(72), UINT8_C(72), UINT8_C(72), UINT8_C(72), UINT8_C(72), UINT8_C(72), + UINT8_C(72), UINT8_C(72), UINT8_C(72), UINT8_C(72), UINT8_C(72), UINT8_C(72), UINT8_C(72), UINT8_C(72) } } }, + { { UINT8_C(66), UINT8_C(23), UINT8_C(149) }, + { UINT8_C(20), UINT8_C(31), UINT8_C(114)}, + { { UINT8_C(66), UINT8_C(66), UINT8_C(66), UINT8_C(66), UINT8_C(66), UINT8_C(66), UINT8_C(66), UINT8_C(66), + UINT8_C(66), UINT8_C(66), UINT8_C(66), UINT8_C(66), UINT8_C(66), UINT8_C(66), UINT8_C(66), UINT8_C(66) }, + { UINT8_C(23), UINT8_C(23), UINT8_C(23), UINT8_C(23), UINT8_C(23), UINT8_C(23), UINT8_C(23), UINT8_C(23), + UINT8_C(23), UINT8_C(23), UINT8_C(23), UINT8_C(23), UINT8_C(23), UINT8_C(23), UINT8_C(23), UINT8_C(23) }, + { UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149), + UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149) } } }, + { { UINT8_C(65), UINT8_C(19), UINT8_C(124) }, + { UINT8_C(31), UINT8_C(9), UINT8_C(194)}, + { { UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65), + UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65) }, + { UINT8_C(19), UINT8_C(19), UINT8_C(19), UINT8_C(19), UINT8_C(19), UINT8_C(19), UINT8_C(19), UINT8_C(19), + UINT8_C(19), UINT8_C(19), UINT8_C(19), UINT8_C(19), UINT8_C(19), UINT8_C(19), UINT8_C(19), UINT8_C(19) }, + { UINT8_C(124), UINT8_C(124), UINT8_C(124), UINT8_C(124), UINT8_C(124), UINT8_C(124), UINT8_C(124), UINT8_C(124), + UINT8_C(124), UINT8_C(124), UINT8_C(124), UINT8_C(124), UINT8_C(124), UINT8_C(124), UINT8_C(124), UINT8_C(124) } } }, + { { UINT8_C(72), UINT8_C(144), UINT8_C(159) }, + { UINT8_C(87), UINT8_C(134), UINT8_C(28)}, + { { UINT8_C(72), UINT8_C(72), UINT8_C(72), UINT8_C(72), UINT8_C(72), UINT8_C(72), UINT8_C(72), UINT8_C(72), + UINT8_C(72), UINT8_C(72), UINT8_C(72), UINT8_C(72), UINT8_C(72), UINT8_C(72), UINT8_C(72), UINT8_C(72) }, + { UINT8_C(144), UINT8_C(144), UINT8_C(144), UINT8_C(144), UINT8_C(144), UINT8_C(144), UINT8_C(144), UINT8_C(144), + UINT8_C(144), UINT8_C(144), UINT8_C(144), UINT8_C(144), UINT8_C(144), UINT8_C(144), UINT8_C(144), UINT8_C(144) }, + { UINT8_C(159), UINT8_C(159), UINT8_C(159), UINT8_C(159), UINT8_C(159), UINT8_C(159), UINT8_C(159), UINT8_C(159), + UINT8_C(159), UINT8_C(159), UINT8_C(159), UINT8_C(159), UINT8_C(159), UINT8_C(159), UINT8_C(159), UINT8_C(159) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x16x3_t r = simde_vld3q_dup_u8(test_vec[i].a); + simde_uint8x16x3_t expected = { + {simde_vld1q_u8(test_vec[i].r[0]), simde_vld1q_u8(test_vec[i].r[1]), simde_vld1q_u8(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_u8x16(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u8x16(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u8x16(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3q_dup_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[3]; + uint16_t unused[3]; + uint16_t r[3][8]; + } test_vec[] = { + { { UINT16_C(13221), UINT16_C(12229), UINT16_C(11071) }, + { UINT16_C(17602), UINT16_C(4227), UINT16_C(7666)}, + { { UINT16_C(13221), UINT16_C(13221), UINT16_C(13221), UINT16_C(13221), + UINT16_C(13221), UINT16_C(13221), UINT16_C(13221), UINT16_C(13221) }, + { UINT16_C(12229), UINT16_C(12229), UINT16_C(12229), UINT16_C(12229), + UINT16_C(12229), UINT16_C(12229), UINT16_C(12229), UINT16_C(12229) }, + { UINT16_C(11071), UINT16_C(11071), UINT16_C(11071), UINT16_C(11071), + UINT16_C(11071), UINT16_C(11071), UINT16_C(11071), UINT16_C(11071) } } }, + { { UINT16_C(12420), UINT16_C(14500), UINT16_C(12248) }, + { UINT16_C(17926), UINT16_C(17458), UINT16_C(3685)}, + { { UINT16_C(12420), UINT16_C(12420), UINT16_C(12420), UINT16_C(12420), + UINT16_C(12420), UINT16_C(12420), UINT16_C(12420), UINT16_C(12420) }, + { UINT16_C(14500), UINT16_C(14500), UINT16_C(14500), UINT16_C(14500), + UINT16_C(14500), UINT16_C(14500), UINT16_C(14500), UINT16_C(14500) }, + { UINT16_C(12248), UINT16_C(12248), UINT16_C(12248), UINT16_C(12248), + UINT16_C(12248), UINT16_C(12248), UINT16_C(12248), UINT16_C(12248) } } }, + { { UINT16_C(8504), UINT16_C(3056), UINT16_C(15978) }, + { UINT16_C(11648), UINT16_C(2353), UINT16_C(16355)}, + { { UINT16_C(8504), UINT16_C(8504), UINT16_C(8504), UINT16_C(8504), + UINT16_C(8504), UINT16_C(8504), UINT16_C(8504), UINT16_C(8504) }, + { UINT16_C(3056), UINT16_C(3056), UINT16_C(3056), UINT16_C(3056), + UINT16_C(3056), UINT16_C(3056), UINT16_C(3056), UINT16_C(3056) }, + { UINT16_C(15978), UINT16_C(15978), UINT16_C(15978), UINT16_C(15978), + UINT16_C(15978), UINT16_C(15978), UINT16_C(15978), UINT16_C(15978) } } }, + { { UINT16_C(15523), UINT16_C(1359), UINT16_C(16502) }, + { UINT16_C(13928), UINT16_C(5794), UINT16_C(818)}, + { { UINT16_C(15523), UINT16_C(15523), UINT16_C(15523), UINT16_C(15523), + UINT16_C(15523), UINT16_C(15523), UINT16_C(15523), UINT16_C(15523) }, + { UINT16_C(1359), UINT16_C(1359), UINT16_C(1359), UINT16_C(1359), + UINT16_C(1359), UINT16_C(1359), UINT16_C(1359), UINT16_C(1359) }, + { UINT16_C(16502), UINT16_C(16502), UINT16_C(16502), UINT16_C(16502), + UINT16_C(16502), UINT16_C(16502), UINT16_C(16502), UINT16_C(16502) } } }, + { { UINT16_C(19929), UINT16_C(2856), UINT16_C(3524) }, + { UINT16_C(4725), UINT16_C(16666), UINT16_C(9010)}, + { { UINT16_C(19929), UINT16_C(19929), UINT16_C(19929), UINT16_C(19929), + UINT16_C(19929), UINT16_C(19929), UINT16_C(19929), UINT16_C(19929) }, + { UINT16_C(2856), UINT16_C(2856), UINT16_C(2856), UINT16_C(2856), + UINT16_C(2856), UINT16_C(2856), UINT16_C(2856), UINT16_C(2856) }, + { UINT16_C(3524), UINT16_C(3524), UINT16_C(3524), UINT16_C(3524), + UINT16_C(3524), UINT16_C(3524), UINT16_C(3524), UINT16_C(3524) } } }, + { { UINT16_C(1112), UINT16_C(8015), UINT16_C(4791) }, + { UINT16_C(18035), UINT16_C(7170), UINT16_C(3494)}, + { { UINT16_C(1112), UINT16_C(1112), UINT16_C(1112), UINT16_C(1112), + UINT16_C(1112), UINT16_C(1112), UINT16_C(1112), UINT16_C(1112) }, + { UINT16_C(8015), UINT16_C(8015), UINT16_C(8015), UINT16_C(8015), + UINT16_C(8015), UINT16_C(8015), UINT16_C(8015), UINT16_C(8015) }, + { UINT16_C(4791), UINT16_C(4791), UINT16_C(4791), UINT16_C(4791), + UINT16_C(4791), UINT16_C(4791), UINT16_C(4791), UINT16_C(4791) } } }, + { { UINT16_C(19750), UINT16_C(19409), UINT16_C(17275) }, + { UINT16_C(5089), UINT16_C(5163), UINT16_C(18594)}, + { { UINT16_C(19750), UINT16_C(19750), UINT16_C(19750), UINT16_C(19750), + UINT16_C(19750), UINT16_C(19750), UINT16_C(19750), UINT16_C(19750) }, + { UINT16_C(19409), UINT16_C(19409), UINT16_C(19409), UINT16_C(19409), + UINT16_C(19409), UINT16_C(19409), UINT16_C(19409), UINT16_C(19409) }, + { UINT16_C(17275), UINT16_C(17275), UINT16_C(17275), UINT16_C(17275), + UINT16_C(17275), UINT16_C(17275), UINT16_C(17275), UINT16_C(17275) } } }, + { { UINT16_C(7578), UINT16_C(8951), UINT16_C(11969) }, + { UINT16_C(15208), UINT16_C(9477), UINT16_C(18579)}, + { { UINT16_C(7578), UINT16_C(7578), UINT16_C(7578), UINT16_C(7578), + UINT16_C(7578), UINT16_C(7578), UINT16_C(7578), UINT16_C(7578) }, + { UINT16_C(8951), UINT16_C(8951), UINT16_C(8951), UINT16_C(8951), + UINT16_C(8951), UINT16_C(8951), UINT16_C(8951), UINT16_C(8951) }, + { UINT16_C(11969), UINT16_C(11969), UINT16_C(11969), UINT16_C(11969), + UINT16_C(11969), UINT16_C(11969), UINT16_C(11969), UINT16_C(11969) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x8x3_t r = simde_vld3q_dup_u16(test_vec[i].a); + simde_uint16x8x3_t expected = { + {simde_vld1q_u16(test_vec[i].r[0]), simde_vld1q_u16(test_vec[i].r[1]), simde_vld1q_u16(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_u16x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u16x8(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u16x8(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3q_dup_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[3]; + uint32_t unused[3]; + uint32_t r[3][4]; + } test_vec[] = { + { { UINT32_C(1876681), UINT32_C(383882), UINT32_C(1503446) }, + { UINT32_C(1131353), UINT32_C(685373), UINT32_C(644898)}, + { { UINT32_C(1876681), UINT32_C(1876681), UINT32_C(1876681), UINT32_C(1876681) }, + { UINT32_C(383882), UINT32_C(383882), UINT32_C(383882), UINT32_C(383882) }, + { UINT32_C(1503446), UINT32_C(1503446), UINT32_C(1503446), UINT32_C(1503446) } } }, + { { UINT32_C(1252582), UINT32_C(197720), UINT32_C(1532363) }, + { UINT32_C(149827), UINT32_C(586569), UINT32_C(781195)}, + { { UINT32_C(1252582), UINT32_C(1252582), UINT32_C(1252582), UINT32_C(1252582) }, + { UINT32_C(197720), UINT32_C(197720), UINT32_C(197720), UINT32_C(197720) }, + { UINT32_C(1532363), UINT32_C(1532363), UINT32_C(1532363), UINT32_C(1532363) } } }, + { { UINT32_C(1255475), UINT32_C(1250931), UINT32_C(344434) }, + { UINT32_C(20022), UINT32_C(598221), UINT32_C(1823423)}, + { { UINT32_C(1255475), UINT32_C(1255475), UINT32_C(1255475), UINT32_C(1255475) }, + { UINT32_C(1250931), UINT32_C(1250931), UINT32_C(1250931), UINT32_C(1250931) }, + { UINT32_C(344434), UINT32_C(344434), UINT32_C(344434), UINT32_C(344434) } } }, + { { UINT32_C(468848), UINT32_C(1580004), UINT32_C(349385) }, + { UINT32_C(398943), UINT32_C(1301831), UINT32_C(129635)}, + { { UINT32_C(468848), UINT32_C(468848), UINT32_C(468848), UINT32_C(468848) }, + { UINT32_C(1580004), UINT32_C(1580004), UINT32_C(1580004), UINT32_C(1580004) }, + { UINT32_C(349385), UINT32_C(349385), UINT32_C(349385), UINT32_C(349385) } } }, + { { UINT32_C(1151250), UINT32_C(233582), UINT32_C(1925943) }, + { UINT32_C(77066), UINT32_C(1269609), UINT32_C(294824)}, + { { UINT32_C(1151250), UINT32_C(1151250), UINT32_C(1151250), UINT32_C(1151250) }, + { UINT32_C(233582), UINT32_C(233582), UINT32_C(233582), UINT32_C(233582) }, + { UINT32_C(1925943), UINT32_C(1925943), UINT32_C(1925943), UINT32_C(1925943) } } }, + { { UINT32_C(498645), UINT32_C(274619), UINT32_C(816883) }, + { UINT32_C(647287), UINT32_C(31939), UINT32_C(1314337)}, + { { UINT32_C(498645), UINT32_C(498645), UINT32_C(498645), UINT32_C(498645) }, + { UINT32_C(274619), UINT32_C(274619), UINT32_C(274619), UINT32_C(274619) }, + { UINT32_C(816883), UINT32_C(816883), UINT32_C(816883), UINT32_C(816883) } } }, + { { UINT32_C(976290), UINT32_C(913370), UINT32_C(617200) }, + { UINT32_C(772841), UINT32_C(145369), UINT32_C(351685)}, + { { UINT32_C(976290), UINT32_C(976290), UINT32_C(976290), UINT32_C(976290) }, + { UINT32_C(913370), UINT32_C(913370), UINT32_C(913370), UINT32_C(913370) }, + { UINT32_C(617200), UINT32_C(617200), UINT32_C(617200), UINT32_C(617200) } } }, + { { UINT32_C(350154), UINT32_C(575781), UINT32_C(1368405) }, + { UINT32_C(172538), UINT32_C(1978412), UINT32_C(1670805)}, + { { UINT32_C(350154), UINT32_C(350154), UINT32_C(350154), UINT32_C(350154) }, + { UINT32_C(575781), UINT32_C(575781), UINT32_C(575781), UINT32_C(575781) }, + { UINT32_C(1368405), UINT32_C(1368405), UINT32_C(1368405), UINT32_C(1368405) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4x3_t r = simde_vld3q_dup_u32(test_vec[i].a); + simde_uint32x4x3_t expected = { + {simde_vld1q_u32(test_vec[i].r[0]), simde_vld1q_u32(test_vec[i].r[1]), simde_vld1q_u32(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_u32x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u32x4(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u32x4(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3q_dup_u64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[3]; + uint64_t unused[3]; + uint64_t r[3][2]; + } test_vec[] = { + { { UINT64_C(155091681), UINT64_C(112589773), UINT64_C(63143791) }, + { UINT64_C(23716520), UINT64_C(176818970), UINT64_C(116173662)}, + { { UINT64_C(155091681), UINT64_C(155091681) }, + { UINT64_C(112589773), UINT64_C(112589773) }, + { UINT64_C(63143791), UINT64_C(63143791) } } }, + { { UINT64_C(130942524), UINT64_C(57780860), UINT64_C(31934739) }, + { UINT64_C(176079421), UINT64_C(5675018), UINT64_C(63494312)}, + { { UINT64_C(130942524), UINT64_C(130942524) }, + { UINT64_C(57780860), UINT64_C(57780860) }, + { UINT64_C(31934739), UINT64_C(31934739) } } }, + { { UINT64_C(194457642), UINT64_C(39554875), UINT64_C(15224283) }, + { UINT64_C(71051928), UINT64_C(81814738), UINT64_C(2283266)}, + { { UINT64_C(194457642), UINT64_C(194457642) }, + { UINT64_C(39554875), UINT64_C(39554875) }, + { UINT64_C(15224283), UINT64_C(15224283) } } }, + { { UINT64_C(23817396), UINT64_C(113541293), UINT64_C(123440835) }, + { UINT64_C(102375805), UINT64_C(133049515), UINT64_C(41509326)}, + { { UINT64_C(23817396), UINT64_C(23817396) }, + { UINT64_C(113541293), UINT64_C(113541293) }, + { UINT64_C(123440835), UINT64_C(123440835) } } }, + { { UINT64_C(2000000), UINT64_C(51642274), UINT64_C(110635130) }, + { UINT64_C(133946772), UINT64_C(153402717), UINT64_C(120380529)}, + { { UINT64_C(2000000), UINT64_C(2000000) }, + { UINT64_C(51642274), UINT64_C(51642274) }, + { UINT64_C(110635130), UINT64_C(110635130) } } }, + { { UINT64_C(174832616), UINT64_C(187609954), UINT64_C(22590510) }, + { UINT64_C(93628333), UINT64_C(86283110), UINT64_C(199160701)}, + { { UINT64_C(174832616), UINT64_C(174832616) }, + { UINT64_C(187609954), UINT64_C(187609954) }, + { UINT64_C(22590510), UINT64_C(22590510) } } }, + { { UINT64_C(137018276), UINT64_C(195056582), UINT64_C(193644384) }, + { UINT64_C(108521062), UINT64_C(28373149), UINT64_C(12285757)}, + { { UINT64_C(137018276), UINT64_C(137018276) }, + { UINT64_C(195056582), UINT64_C(195056582) }, + { UINT64_C(193644384), UINT64_C(193644384) } } }, + { { UINT64_C(56333011), UINT64_C(89077389), UINT64_C(76364445) }, + { UINT64_C(662711), UINT64_C(94837583), UINT64_C(180422589)}, + { { UINT64_C(56333011), UINT64_C(56333011) }, + { UINT64_C(89077389), UINT64_C(89077389) }, + { UINT64_C(76364445), UINT64_C(76364445) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2x3_t r = simde_vld3q_dup_u64(test_vec[i].a); + simde_uint64x2x3_t expected = { + {simde_vld1q_u64(test_vec[i].r[0]), simde_vld1q_u64(test_vec[i].r[1]), simde_vld1q_u64(test_vec[i].r[2])}}; + + simde_test_arm_neon_assert_equal_u64x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u64x2(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u64x2(r.val[2], expected.val[2]); + } + + return 0; +} + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_TEST_FUNC_LIST_BEGIN +#if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_dup_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_dup_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_dup_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_dup_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_dup_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_dup_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_dup_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_dup_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_dup_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_dup_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_dup_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_dup_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_dup_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_dup_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_dup_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_dup_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_dup_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_dup_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_dup_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_dup_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_dup_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_dup_u64) +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/ld3_lane.c b/test/arm/neon/ld3_lane.c new file mode 100644 index 000000000..0fe1c7df2 --- /dev/null +++ b/test/arm/neon/ld3_lane.c @@ -0,0 +1,1777 @@ +#define SIMDE_TEST_ARM_NEON_INSN ld3_lane + +#include "test-neon.h" +#include "../../../simde/arm/neon/ld3_lane.h" +#include "../../../simde/arm/neon/ld1.h" +#include "../../../simde/arm/neon/ld3.h" + +static int +test_simde_vld3_lane_s8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int8_t src[3][8]; + int8_t buf[3]; + int8_t r[3][8]; + } test_vec[] = { + { { { -INT8_C(17), -INT8_C(27), -INT8_C(24), -INT8_C(29), + INT8_C(45), -INT8_C(6), INT8_C(34), INT8_C(10) }, + { INT8_C(15), -INT8_C(6), -INT8_C(6), -INT8_C(34), + INT8_C(47), -INT8_C(6), -INT8_C(24), INT8_C(49) }, + { INT8_C(32), -INT8_C(18), -INT8_C(20), -INT8_C(33), + INT8_C(12), -INT8_C(21), INT8_C(2), INT8_C(28) } }, + { -INT8_C(9), -INT8_C(28), -INT8_C(19)}, + { { -INT8_C(9), -INT8_C(27), -INT8_C(24), -INT8_C(29), + INT8_C(45), -INT8_C(6), INT8_C(34), INT8_C(10) }, + { -INT8_C(28), -INT8_C(6), -INT8_C(6), -INT8_C(34), + INT8_C(47), -INT8_C(6), -INT8_C(24), INT8_C(49) }, + { -INT8_C(19), -INT8_C(18), -INT8_C(20), -INT8_C(33), + INT8_C(12), -INT8_C(21), INT8_C(2), INT8_C(28) } } }, + { { { INT8_C(33), -INT8_C(24), INT8_C(12), -INT8_C(5), + -INT8_C(4), INT8_C(9), INT8_C(28), -INT8_C(40) }, + { -INT8_C(25), -INT8_C(46), -INT8_C(36), -INT8_C(31), + -INT8_C(43), INT8_C(0), -INT8_C(20), -INT8_C(12) }, + { INT8_C(49), -INT8_C(13), -INT8_C(21), -INT8_C(19), + -INT8_C(42), INT8_C(4), INT8_C(13), INT8_C(29) } }, + { INT8_C(46), INT8_C(40), -INT8_C(35)}, + { { INT8_C(33), INT8_C(46), INT8_C(12), -INT8_C(5), + -INT8_C(4), INT8_C(9), INT8_C(28), -INT8_C(40) }, + { -INT8_C(25), INT8_C(40), -INT8_C(36), -INT8_C(31), + -INT8_C(43), INT8_C(0), -INT8_C(20), -INT8_C(12) }, + { INT8_C(49), -INT8_C(35), -INT8_C(21), -INT8_C(19), + -INT8_C(42), INT8_C(4), INT8_C(13), INT8_C(29) } } }, + { { { -INT8_C(27), INT8_C(37), -INT8_C(43), -INT8_C(32), + -INT8_C(3), INT8_C(33), -INT8_C(33), -INT8_C(40) }, + { INT8_C(34), INT8_C(6), -INT8_C(20), -INT8_C(40), + -INT8_C(34), -INT8_C(45), -INT8_C(44), INT8_C(38) }, + { INT8_C(3), INT8_C(26), INT8_C(30), INT8_C(6), + -INT8_C(3), -INT8_C(2), -INT8_C(40), -INT8_C(5) } }, + { INT8_C(16), INT8_C(31), INT8_C(26)}, + { { -INT8_C(27), INT8_C(37), INT8_C(16), -INT8_C(32), + -INT8_C(3), INT8_C(33), -INT8_C(33), -INT8_C(40) }, + { INT8_C(34), INT8_C(6), INT8_C(31), -INT8_C(40), + -INT8_C(34), -INT8_C(45), -INT8_C(44), INT8_C(38) }, + { INT8_C(3), INT8_C(26), INT8_C(26), INT8_C(6), + -INT8_C(3), -INT8_C(2), -INT8_C(40), -INT8_C(5) } } }, + { { { INT8_C(10), INT8_C(35), -INT8_C(11), -INT8_C(33), + -INT8_C(4), -INT8_C(40), -INT8_C(41), INT8_C(1) }, + { -INT8_C(21), -INT8_C(47), -INT8_C(4), -INT8_C(34), + INT8_C(19), -INT8_C(3), -INT8_C(44), INT8_C(33) }, + { -INT8_C(7), INT8_C(0), INT8_C(1), INT8_C(17), + -INT8_C(20), -INT8_C(4), -INT8_C(6), INT8_C(0) } }, + { INT8_C(34), -INT8_C(2), -INT8_C(40)}, + { { INT8_C(10), INT8_C(35), -INT8_C(11), INT8_C(34), + -INT8_C(4), -INT8_C(40), -INT8_C(41), INT8_C(1) }, + { -INT8_C(21), -INT8_C(47), -INT8_C(4), -INT8_C(2), + INT8_C(19), -INT8_C(3), -INT8_C(44), INT8_C(33) }, + { -INT8_C(7), INT8_C(0), INT8_C(1), -INT8_C(40), + -INT8_C(20), -INT8_C(4), -INT8_C(6), INT8_C(0) } } }, + { { { -INT8_C(12), INT8_C(32), -INT8_C(3), INT8_C(11), + INT8_C(0), INT8_C(1), INT8_C(1), -INT8_C(2) }, + { INT8_C(6), -INT8_C(22), -INT8_C(37), -INT8_C(9), + INT8_C(0), INT8_C(43), INT8_C(37), -INT8_C(31) }, + { -INT8_C(33), -INT8_C(18), INT8_C(20), -INT8_C(34), + INT8_C(49), -INT8_C(23), INT8_C(10), -INT8_C(44) } }, + { -INT8_C(38), -INT8_C(42), -INT8_C(3)}, + { { -INT8_C(12), INT8_C(32), -INT8_C(3), INT8_C(11), + -INT8_C(38), INT8_C(1), INT8_C(1), -INT8_C(2) }, + { INT8_C(6), -INT8_C(22), -INT8_C(37), -INT8_C(9), + -INT8_C(42), INT8_C(43), INT8_C(37), -INT8_C(31) }, + { -INT8_C(33), -INT8_C(18), INT8_C(20), -INT8_C(34), + -INT8_C(3), -INT8_C(23), INT8_C(10), -INT8_C(44) } } }, + { { { -INT8_C(33), -INT8_C(1), -INT8_C(12), -INT8_C(11), + INT8_C(22), INT8_C(9), -INT8_C(46), -INT8_C(47) }, + { INT8_C(11), INT8_C(4), INT8_C(6), INT8_C(27), + INT8_C(47), -INT8_C(17), -INT8_C(44), -INT8_C(20) }, + { INT8_C(42), INT8_C(16), -INT8_C(25), -INT8_C(48), + -INT8_C(20), -INT8_C(32), INT8_C(29), INT8_C(28) } }, + { -INT8_C(20), INT8_C(0), -INT8_C(3)}, + { { -INT8_C(33), -INT8_C(1), -INT8_C(12), -INT8_C(11), + INT8_C(22), -INT8_C(20), -INT8_C(46), -INT8_C(47) }, + { INT8_C(11), INT8_C(4), INT8_C(6), INT8_C(27), + INT8_C(47), INT8_C(0), -INT8_C(44), -INT8_C(20) }, + { INT8_C(42), INT8_C(16), -INT8_C(25), -INT8_C(48), + -INT8_C(20), -INT8_C(3), INT8_C(29), INT8_C(28) } } }, + { { { INT8_C(11), -INT8_C(5), INT8_C(3), -INT8_C(28), + INT8_C(37), INT8_C(25), INT8_C(16), INT8_C(13) }, + { INT8_C(24), -INT8_C(39), -INT8_C(4), -INT8_C(13), + INT8_C(49), -INT8_C(3), INT8_C(25), INT8_C(9) }, + { -INT8_C(48), INT8_C(8), INT8_C(12), -INT8_C(4), + INT8_C(0), -INT8_C(7), -INT8_C(14), INT8_C(20) } }, + { -INT8_C(25), INT8_C(1), INT8_C(11)}, + { { INT8_C(11), -INT8_C(5), INT8_C(3), -INT8_C(28), + INT8_C(37), INT8_C(25), -INT8_C(25), INT8_C(13) }, + { INT8_C(24), -INT8_C(39), -INT8_C(4), -INT8_C(13), + INT8_C(49), -INT8_C(3), INT8_C(1), INT8_C(9) }, + { -INT8_C(48), INT8_C(8), INT8_C(12), -INT8_C(4), + INT8_C(0), -INT8_C(7), INT8_C(11), INT8_C(20) } } }, + { { { INT8_C(22), -INT8_C(26), INT8_C(11), -INT8_C(25), + INT8_C(7), INT8_C(29), INT8_C(35), -INT8_C(33) }, + { -INT8_C(21), INT8_C(33), INT8_C(0), -INT8_C(39), + -INT8_C(43), INT8_C(28), INT8_C(31), -INT8_C(10) }, + { -INT8_C(17), INT8_C(14), -INT8_C(41), INT8_C(30), + INT8_C(37), INT8_C(32), -INT8_C(7), -INT8_C(4) } }, + { -INT8_C(30), -INT8_C(48), INT8_C(17)}, + { { INT8_C(22), -INT8_C(26), INT8_C(11), -INT8_C(25), + INT8_C(7), INT8_C(29), INT8_C(35), -INT8_C(30) }, + { -INT8_C(21), INT8_C(33), INT8_C(0), -INT8_C(39), + -INT8_C(43), INT8_C(28), INT8_C(31), -INT8_C(48) }, + { -INT8_C(17), INT8_C(14), -INT8_C(41), INT8_C(30), + INT8_C(37), INT8_C(32), -INT8_C(7), INT8_C(17) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x8x3_t r, src, expected; + src.val[0] = simde_vld1_s8(test_vec[i].src[0]); + src.val[1] = simde_vld1_s8(test_vec[i].src[1]); + src.val[2] = simde_vld1_s8(test_vec[i].src[2]); + + SIMDE_CONSTIFY_8_(simde_vld3_lane_s8, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1_s8(test_vec[i].r[0]); + expected.val[1] = simde_vld1_s8(test_vec[i].r[1]); + expected.val[2] = simde_vld1_s8(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_i8x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i8x8(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i8x8(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t src[3][4]; + int16_t buf[3]; + int16_t r[3][4]; + } test_vec[] = { + { { { INT16_C(2717), INT16_C(575), INT16_C(4902), INT16_C(2261) }, + { INT16_C(1233), INT16_C(4720), -INT16_C(4726), INT16_C(4438) }, + { -INT16_C(4256), -INT16_C(1119), INT16_C(3307), -INT16_C(1818) } }, + { INT16_C(986), -INT16_C(4050), -INT16_C(1206)}, + { { INT16_C(986), INT16_C(575), INT16_C(4902), INT16_C(2261) }, + { -INT16_C(4050), INT16_C(4720), -INT16_C(4726), INT16_C(4438) }, + { -INT16_C(1206), -INT16_C(1119), INT16_C(3307), -INT16_C(1818) } } }, + { { { -INT16_C(82), INT16_C(3569), -INT16_C(3685), -INT16_C(2988) }, + { INT16_C(2597), INT16_C(868), INT16_C(1723), INT16_C(2628) }, + { INT16_C(905), -INT16_C(2883), -INT16_C(1731), -INT16_C(2362) } }, + { INT16_C(4882), INT16_C(4746), -INT16_C(2975)}, + { { -INT16_C(82), INT16_C(4882), -INT16_C(3685), -INT16_C(2988) }, + { INT16_C(2597), INT16_C(4746), INT16_C(1723), INT16_C(2628) }, + { INT16_C(905), -INT16_C(2975), -INT16_C(1731), -INT16_C(2362) } } }, + { { { -INT16_C(1293), -INT16_C(551), -INT16_C(1223), INT16_C(4017) }, + { INT16_C(1594), INT16_C(1517), INT16_C(2534), INT16_C(2808) }, + { INT16_C(415), INT16_C(2904), INT16_C(1958), INT16_C(961) } }, + { -INT16_C(3918), INT16_C(1301), INT16_C(150)}, + { { -INT16_C(1293), -INT16_C(551), -INT16_C(3918), INT16_C(4017) }, + { INT16_C(1594), INT16_C(1517), INT16_C(1301), INT16_C(2808) }, + { INT16_C(415), INT16_C(2904), INT16_C(150), INT16_C(961) } } }, + { { { -INT16_C(2100), INT16_C(2262), -INT16_C(3223), -INT16_C(3872) }, + { -INT16_C(2926), -INT16_C(109), -INT16_C(4620), INT16_C(4053) }, + { INT16_C(3674), INT16_C(2439), INT16_C(4543), -INT16_C(996) } }, + { INT16_C(4138), INT16_C(3789), -INT16_C(566)}, + { { -INT16_C(2100), INT16_C(2262), -INT16_C(3223), INT16_C(4138) }, + { -INT16_C(2926), -INT16_C(109), -INT16_C(4620), INT16_C(3789) }, + { INT16_C(3674), INT16_C(2439), INT16_C(4543), -INT16_C(566) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x4x3_t r, src, expected; + src.val[0] = simde_vld1_s16(test_vec[i].src[0]); + src.val[1] = simde_vld1_s16(test_vec[i].src[1]); + src.val[2] = simde_vld1_s16(test_vec[i].src[2]); + + SIMDE_CONSTIFY_4_(simde_vld3_lane_s16, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1_s16(test_vec[i].r[0]); + expected.val[1] = simde_vld1_s16(test_vec[i].r[1]); + expected.val[2] = simde_vld1_s16(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_i16x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i16x4(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i16x4(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t src[3][2]; + int32_t buf[3]; + int32_t r[3][2]; + } test_vec[] = { + { { { -INT32_C(386983), -INT32_C(217954) }, + { INT32_C(344519), -INT32_C(251850) }, + { -INT32_C(126242), INT32_C(117859) } }, + { INT32_C(83246), INT32_C(273121), INT32_C(59814)}, + { { INT32_C(83246), -INT32_C(217954) }, + { INT32_C(273121), -INT32_C(251850) }, + { INT32_C(59814), INT32_C(117859) } } }, + { { { -INT32_C(177200), INT32_C(336643) }, + { INT32_C(240336), -INT32_C(324844) }, + { -INT32_C(254120), -INT32_C(345522) } }, + { INT32_C(488920), -INT32_C(470183), -INT32_C(90512)}, + { { -INT32_C(177200), INT32_C(488920) }, + { INT32_C(240336), -INT32_C(470183) }, + { -INT32_C(254120), -INT32_C(90512) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x2x3_t r, src, expected; + src.val[0] = simde_vld1_s32(test_vec[i].src[0]); + src.val[1] = simde_vld1_s32(test_vec[i].src[1]); + src.val[2] = simde_vld1_s32(test_vec[i].src[2]); + + SIMDE_CONSTIFY_2_(simde_vld3_lane_s32, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1_s32(test_vec[i].r[0]); + expected.val[1] = simde_vld1_s32(test_vec[i].r[1]); + expected.val[2] = simde_vld1_s32(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_i32x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i32x2(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i32x2(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3_lane_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t src[3][1]; + int64_t buf[3]; + int64_t r[3][1]; + } test_vec[] = { + { { { -INT64_C(41604193) }, + { -INT64_C(22070552) }, + { INT64_C(139589) } }, + { -INT64_C(18919378), -INT64_C(10176256), INT64_C(1594941)}, + { { -INT64_C(18919378) }, + { -INT64_C(10176256) }, + { INT64_C(1594941) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x1x3_t r, src, expected; + src.val[0] = simde_vld1_s64(test_vec[i].src[0]); + src.val[1] = simde_vld1_s64(test_vec[i].src[1]); + src.val[2] = simde_vld1_s64(test_vec[i].src[2]); + + r = simde_vld3_lane_s64(test_vec[i].buf, src, 0); + + expected.val[0] = simde_vld1_s64(test_vec[i].r[0]); + expected.val[1] = simde_vld1_s64(test_vec[i].r[1]); + expected.val[2] = simde_vld1_s64(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_i64x1(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i64x1(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i64x1(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3_lane_u8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint8_t src[3][8]; + uint8_t buf[3]; + uint8_t r[3][8]; + } test_vec[] = { + { { { UINT8_C(75), UINT8_C(35), UINT8_C(26), UINT8_C(54), + UINT8_C(1), UINT8_C(45), UINT8_C(13), UINT8_C(16) }, + { UINT8_C(71), UINT8_C(47), UINT8_C(17), UINT8_C(80), + UINT8_C(66), UINT8_C(34), UINT8_C(97), UINT8_C(20) }, + { UINT8_C(41), UINT8_C(28), UINT8_C(70), UINT8_C(58), + UINT8_C(81), UINT8_C(47), UINT8_C(7), UINT8_C(80) } }, + { UINT8_C(92), UINT8_C(95), UINT8_C(50)}, + { { UINT8_C(92), UINT8_C(35), UINT8_C(26), UINT8_C(54), + UINT8_C(1), UINT8_C(45), UINT8_C(13), UINT8_C(16) }, + { UINT8_C(95), UINT8_C(47), UINT8_C(17), UINT8_C(80), + UINT8_C(66), UINT8_C(34), UINT8_C(97), UINT8_C(20) }, + { UINT8_C(50), UINT8_C(28), UINT8_C(70), UINT8_C(58), + UINT8_C(81), UINT8_C(47), UINT8_C(7), UINT8_C(80) } } }, + { { { UINT8_C(42), UINT8_C(96), UINT8_C(15), UINT8_C(5), + UINT8_C(54), UINT8_C(1), UINT8_C(68), UINT8_C(93) }, + { UINT8_C(5), UINT8_C(62), UINT8_C(9), UINT8_C(30), + UINT8_C(18), UINT8_C(45), UINT8_C(2), UINT8_C(28) }, + { UINT8_C(65), UINT8_C(82), UINT8_C(30), UINT8_C(37), + UINT8_C(18), UINT8_C(53), UINT8_C(59), UINT8_C(73) } }, + { UINT8_C(45), UINT8_C(63), UINT8_C(47)}, + { { UINT8_C(42), UINT8_C(45), UINT8_C(15), UINT8_C(5), + UINT8_C(54), UINT8_C(1), UINT8_C(68), UINT8_C(93) }, + { UINT8_C(5), UINT8_C(63), UINT8_C(9), UINT8_C(30), + UINT8_C(18), UINT8_C(45), UINT8_C(2), UINT8_C(28) }, + { UINT8_C(65), UINT8_C(47), UINT8_C(30), UINT8_C(37), + UINT8_C(18), UINT8_C(53), UINT8_C(59), UINT8_C(73) } } }, + { { { UINT8_C(5), UINT8_C(76), UINT8_C(80), UINT8_C(97), + UINT8_C(33), UINT8_C(72), UINT8_C(43), UINT8_C(79) }, + { UINT8_C(75), UINT8_C(82), UINT8_C(0), UINT8_C(67), + UINT8_C(37), UINT8_C(5), UINT8_C(72), UINT8_C(27) }, + { UINT8_C(1), UINT8_C(63), UINT8_C(20), UINT8_C(19), + UINT8_C(83), UINT8_C(8), UINT8_C(44), UINT8_C(57) } }, + { UINT8_C(63), UINT8_C(85), UINT8_C(98)}, + { { UINT8_C(5), UINT8_C(76), UINT8_C(63), UINT8_C(97), + UINT8_C(33), UINT8_C(72), UINT8_C(43), UINT8_C(79) }, + { UINT8_C(75), UINT8_C(82), UINT8_C(85), UINT8_C(67), + UINT8_C(37), UINT8_C(5), UINT8_C(72), UINT8_C(27) }, + { UINT8_C(1), UINT8_C(63), UINT8_C(98), UINT8_C(19), + UINT8_C(83), UINT8_C(8), UINT8_C(44), UINT8_C(57) } } }, + { { { UINT8_C(72), UINT8_C(56), UINT8_C(65), UINT8_C(46), + UINT8_C(4), UINT8_C(16), UINT8_C(78), UINT8_C(1) }, + { UINT8_C(18), UINT8_C(48), UINT8_C(95), UINT8_C(22), + UINT8_C(70), UINT8_C(44), UINT8_C(38), UINT8_C(3) }, + { UINT8_C(23), UINT8_C(7), UINT8_C(81), UINT8_C(1), + UINT8_C(25), UINT8_C(17), UINT8_C(17), UINT8_C(41) } }, + { UINT8_C(60), UINT8_C(43), UINT8_C(32)}, + { { UINT8_C(72), UINT8_C(56), UINT8_C(65), UINT8_C(60), + UINT8_C(4), UINT8_C(16), UINT8_C(78), UINT8_C(1) }, + { UINT8_C(18), UINT8_C(48), UINT8_C(95), UINT8_C(43), + UINT8_C(70), UINT8_C(44), UINT8_C(38), UINT8_C(3) }, + { UINT8_C(23), UINT8_C(7), UINT8_C(81), UINT8_C(32), + UINT8_C(25), UINT8_C(17), UINT8_C(17), UINT8_C(41) } } }, + { { { UINT8_C(90), UINT8_C(17), UINT8_C(65), UINT8_C(25), + UINT8_C(72), UINT8_C(23), UINT8_C(54), UINT8_C(47) }, + { UINT8_C(59), UINT8_C(82), UINT8_C(35), UINT8_C(91), + UINT8_C(46), UINT8_C(26), UINT8_C(40), UINT8_C(79) }, + { UINT8_C(73), UINT8_C(71), UINT8_C(80), UINT8_C(13), + UINT8_C(88), UINT8_C(62), UINT8_C(41), UINT8_C(0) } }, + { UINT8_C(86), UINT8_C(2), UINT8_C(23)}, + { { UINT8_C(90), UINT8_C(17), UINT8_C(65), UINT8_C(25), + UINT8_C(86), UINT8_C(23), UINT8_C(54), UINT8_C(47) }, + { UINT8_C(59), UINT8_C(82), UINT8_C(35), UINT8_C(91), + UINT8_C(2), UINT8_C(26), UINT8_C(40), UINT8_C(79) }, + { UINT8_C(73), UINT8_C(71), UINT8_C(80), UINT8_C(13), + UINT8_C(23), UINT8_C(62), UINT8_C(41), UINT8_C(0) } } }, + { { { UINT8_C(9), UINT8_C(4), UINT8_C(48), UINT8_C(49), + UINT8_C(13), UINT8_C(76), UINT8_C(28), UINT8_C(38) }, + { UINT8_C(80), UINT8_C(15), UINT8_C(59), UINT8_C(29), + UINT8_C(83), UINT8_C(69), UINT8_C(55), UINT8_C(2) }, + { UINT8_C(12), UINT8_C(53), UINT8_C(98), UINT8_C(65), + UINT8_C(68), UINT8_C(99), UINT8_C(58), UINT8_C(18) } }, + { UINT8_C(49), UINT8_C(77), UINT8_C(71)}, + { { UINT8_C(9), UINT8_C(4), UINT8_C(48), UINT8_C(49), + UINT8_C(13), UINT8_C(49), UINT8_C(28), UINT8_C(38) }, + { UINT8_C(80), UINT8_C(15), UINT8_C(59), UINT8_C(29), + UINT8_C(83), UINT8_C(77), UINT8_C(55), UINT8_C(2) }, + { UINT8_C(12), UINT8_C(53), UINT8_C(98), UINT8_C(65), + UINT8_C(68), UINT8_C(71), UINT8_C(58), UINT8_C(18) } } }, + { { { UINT8_C(21), UINT8_C(55), UINT8_C(12), UINT8_C(49), + UINT8_C(97), UINT8_C(12), UINT8_C(16), UINT8_C(51) }, + { UINT8_C(90), UINT8_C(34), UINT8_C(48), UINT8_C(61), + UINT8_C(90), UINT8_C(4), UINT8_C(74), UINT8_C(53) }, + { UINT8_C(44), UINT8_C(38), UINT8_C(5), UINT8_C(81), + UINT8_C(71), UINT8_C(12), UINT8_C(0), UINT8_C(21) } }, + { UINT8_C(61), UINT8_C(40), UINT8_C(87)}, + { { UINT8_C(21), UINT8_C(55), UINT8_C(12), UINT8_C(49), + UINT8_C(97), UINT8_C(12), UINT8_C(61), UINT8_C(51) }, + { UINT8_C(90), UINT8_C(34), UINT8_C(48), UINT8_C(61), + UINT8_C(90), UINT8_C(4), UINT8_C(40), UINT8_C(53) }, + { UINT8_C(44), UINT8_C(38), UINT8_C(5), UINT8_C(81), + UINT8_C(71), UINT8_C(12), UINT8_C(87), UINT8_C(21) } } }, + { { { UINT8_C(95), UINT8_C(70), UINT8_C(79), UINT8_C(1), + UINT8_C(27), UINT8_C(95), UINT8_C(83), UINT8_C(31) }, + { UINT8_C(45), UINT8_C(24), UINT8_C(38), UINT8_C(76), + UINT8_C(85), UINT8_C(61), UINT8_C(59), UINT8_C(71) }, + { UINT8_C(7), UINT8_C(7), UINT8_C(22), UINT8_C(38), + UINT8_C(73), UINT8_C(72), UINT8_C(79), UINT8_C(43) } }, + { UINT8_C(9), UINT8_C(96), UINT8_C(17)}, + { { UINT8_C(95), UINT8_C(70), UINT8_C(79), UINT8_C(1), + UINT8_C(27), UINT8_C(95), UINT8_C(83), UINT8_C(9) }, + { UINT8_C(45), UINT8_C(24), UINT8_C(38), UINT8_C(76), + UINT8_C(85), UINT8_C(61), UINT8_C(59), UINT8_C(96) }, + { UINT8_C(7), UINT8_C(7), UINT8_C(22), UINT8_C(38), + UINT8_C(73), UINT8_C(72), UINT8_C(79), UINT8_C(17) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x8x3_t r, src, expected; + src.val[0] = simde_vld1_u8(test_vec[i].src[0]); + src.val[1] = simde_vld1_u8(test_vec[i].src[1]); + src.val[2] = simde_vld1_u8(test_vec[i].src[2]); + + SIMDE_CONSTIFY_8_(simde_vld3_lane_u8, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1_u8(test_vec[i].r[0]); + expected.val[1] = simde_vld1_u8(test_vec[i].r[1]); + expected.val[2] = simde_vld1_u8(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_u8x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u8x8(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u8x8(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t src[3][4]; + uint16_t buf[3]; + uint16_t r[3][4]; + } test_vec[] = { + { { { UINT16_C(6015), UINT16_C(2114), UINT16_C(9985), UINT16_C(6732) }, + { UINT16_C(8774), UINT16_C(5943), UINT16_C(9952), UINT16_C(6251) }, + { UINT16_C(7677), UINT16_C(4431), UINT16_C(8936), UINT16_C(5217) } }, + { UINT16_C(6123), UINT16_C(8122), UINT16_C(6028)}, + { { UINT16_C(6123), UINT16_C(2114), UINT16_C(9985), UINT16_C(6732) }, + { UINT16_C(8122), UINT16_C(5943), UINT16_C(9952), UINT16_C(6251) }, + { UINT16_C(6028), UINT16_C(4431), UINT16_C(8936), UINT16_C(5217) } } }, + { { { UINT16_C(1502), UINT16_C(4900), UINT16_C(3595), UINT16_C(5262) }, + { UINT16_C(8144), UINT16_C(1060), UINT16_C(6932), UINT16_C(5912) }, + { UINT16_C(6252), UINT16_C(4564), UINT16_C(7952), UINT16_C(4773) } }, + { UINT16_C(8849), UINT16_C(6285), UINT16_C(845)}, + { { UINT16_C(1502), UINT16_C(8849), UINT16_C(3595), UINT16_C(5262) }, + { UINT16_C(8144), UINT16_C(6285), UINT16_C(6932), UINT16_C(5912) }, + { UINT16_C(6252), UINT16_C(845), UINT16_C(7952), UINT16_C(4773) } } }, + { { { UINT16_C(8789), UINT16_C(6109), UINT16_C(2590), UINT16_C(503) }, + { UINT16_C(5135), UINT16_C(1328), UINT16_C(5106), UINT16_C(3566) }, + { UINT16_C(6125), UINT16_C(5630), UINT16_C(1661), UINT16_C(5502) } }, + { UINT16_C(7329), UINT16_C(958), UINT16_C(9522)}, + { { UINT16_C(8789), UINT16_C(6109), UINT16_C(7329), UINT16_C(503) }, + { UINT16_C(5135), UINT16_C(1328), UINT16_C(958), UINT16_C(3566) }, + { UINT16_C(6125), UINT16_C(5630), UINT16_C(9522), UINT16_C(5502) } } }, + { { { UINT16_C(237), UINT16_C(4662), UINT16_C(2239), UINT16_C(7486) }, + { UINT16_C(9269), UINT16_C(4261), UINT16_C(1015), UINT16_C(4021) }, + { UINT16_C(8255), UINT16_C(239), UINT16_C(9693), UINT16_C(2843) } }, + { UINT16_C(2785), UINT16_C(9518), UINT16_C(5622)}, + { { UINT16_C(237), UINT16_C(4662), UINT16_C(2239), UINT16_C(2785) }, + { UINT16_C(9269), UINT16_C(4261), UINT16_C(1015), UINT16_C(9518) }, + { UINT16_C(8255), UINT16_C(239), UINT16_C(9693), UINT16_C(5622) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x4x3_t r, src, expected; + src.val[0] = simde_vld1_u16(test_vec[i].src[0]); + src.val[1] = simde_vld1_u16(test_vec[i].src[1]); + src.val[2] = simde_vld1_u16(test_vec[i].src[2]); + + SIMDE_CONSTIFY_4_(simde_vld3_lane_u16, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1_u16(test_vec[i].r[0]); + expected.val[1] = simde_vld1_u16(test_vec[i].r[1]); + expected.val[2] = simde_vld1_u16(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_u16x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u16x4(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u16x4(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t src[3][2]; + uint32_t buf[3]; + uint32_t r[3][2]; + } test_vec[] = { + { { { UINT32_C(649331), UINT32_C(837601) }, + { UINT32_C(485878), UINT32_C(933696) }, + { UINT32_C(256110), UINT32_C(394676) } }, + { UINT32_C(547466), UINT32_C(729971), UINT32_C(327258)}, + { { UINT32_C(547466), UINT32_C(837601) }, + { UINT32_C(729971), UINT32_C(933696) }, + { UINT32_C(327258), UINT32_C(394676) } } }, + { { { UINT32_C(296079), UINT32_C(480957) }, + { UINT32_C(878808), UINT32_C(469556) }, + { UINT32_C(544354), UINT32_C(170596) } }, + { UINT32_C(663329), UINT32_C(622760), UINT32_C(266353)}, + { { UINT32_C(296079), UINT32_C(663329) }, + { UINT32_C(878808), UINT32_C(622760) }, + { UINT32_C(544354), UINT32_C(266353) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x2x3_t r, src, expected; + src.val[0] = simde_vld1_u32(test_vec[i].src[0]); + src.val[1] = simde_vld1_u32(test_vec[i].src[1]); + src.val[2] = simde_vld1_u32(test_vec[i].src[2]); + + SIMDE_CONSTIFY_2_(simde_vld3_lane_u32, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1_u32(test_vec[i].r[0]); + expected.val[1] = simde_vld1_u32(test_vec[i].r[1]); + expected.val[2] = simde_vld1_u32(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_u32x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u32x2(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u32x2(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t src[3][1]; + uint64_t buf[3]; + uint64_t r[3][1]; + } test_vec[] = { + { { { UINT64_C(54243463) }, + { UINT64_C(32138549) }, + { UINT64_C(65147451) } }, + { UINT64_C(36001229), UINT64_C(4492888), UINT64_C(79288036)}, + { { UINT64_C(36001229) }, + { UINT64_C(4492888) }, + { UINT64_C(79288036) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x1x3_t r, src, expected; + src.val[0] = simde_vld1_u64(test_vec[i].src[0]); + src.val[1] = simde_vld1_u64(test_vec[i].src[1]); + src.val[2] = simde_vld1_u64(test_vec[i].src[2]); + + r = simde_vld3_lane_u64(test_vec[i].buf, src, 0); + + expected.val[0] = simde_vld1_u64(test_vec[i].r[0]); + expected.val[1] = simde_vld1_u64(test_vec[i].r[1]); + expected.val[2] = simde_vld1_u64(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_u64x1(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u64x1(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u64x1(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t src[3][4]; + simde_float16_t buf[3]; + simde_float16_t r[3][4]; + } test_vec[] = { + { { { SIMDE_FLOAT16_VALUE(-38.09), SIMDE_FLOAT16_VALUE(39.56), SIMDE_FLOAT16_VALUE(12.85), SIMDE_FLOAT16_VALUE(-6.63) }, + { SIMDE_FLOAT16_VALUE(-41.59), SIMDE_FLOAT16_VALUE(46.59), SIMDE_FLOAT16_VALUE(22.79), SIMDE_FLOAT16_VALUE(-13.35) }, + { SIMDE_FLOAT16_VALUE(25.23), SIMDE_FLOAT16_VALUE(-26.59), SIMDE_FLOAT16_VALUE(19.00), SIMDE_FLOAT16_VALUE(-16.99) } }, + { SIMDE_FLOAT16_VALUE(6.58), SIMDE_FLOAT16_VALUE(-23.21), SIMDE_FLOAT16_VALUE(-16.75)}, + { { SIMDE_FLOAT16_VALUE(6.58), SIMDE_FLOAT16_VALUE(39.56), SIMDE_FLOAT16_VALUE(12.85), SIMDE_FLOAT16_VALUE(-6.63) }, + { SIMDE_FLOAT16_VALUE(-23.21), SIMDE_FLOAT16_VALUE(46.59), SIMDE_FLOAT16_VALUE(22.79), SIMDE_FLOAT16_VALUE(-13.35) }, + { SIMDE_FLOAT16_VALUE(-16.75), SIMDE_FLOAT16_VALUE(-26.59), SIMDE_FLOAT16_VALUE(19.00), SIMDE_FLOAT16_VALUE(-16.99) } } }, + { { { SIMDE_FLOAT16_VALUE(-31.46), SIMDE_FLOAT16_VALUE(30.50), SIMDE_FLOAT16_VALUE(-31.78), SIMDE_FLOAT16_VALUE(47.74) }, + { SIMDE_FLOAT16_VALUE(33.97), SIMDE_FLOAT16_VALUE(37.16), SIMDE_FLOAT16_VALUE(27.13), SIMDE_FLOAT16_VALUE(-28.58) }, + { SIMDE_FLOAT16_VALUE(-7.95), SIMDE_FLOAT16_VALUE(-26.73), SIMDE_FLOAT16_VALUE(17.08), SIMDE_FLOAT16_VALUE(39.81) } }, + { SIMDE_FLOAT16_VALUE(-28.19), SIMDE_FLOAT16_VALUE(29.28), SIMDE_FLOAT16_VALUE(-1.21)}, + { { SIMDE_FLOAT16_VALUE(-31.46), SIMDE_FLOAT16_VALUE(-28.19), SIMDE_FLOAT16_VALUE(-31.78), SIMDE_FLOAT16_VALUE(47.74) }, + { SIMDE_FLOAT16_VALUE(33.97), SIMDE_FLOAT16_VALUE(29.28), SIMDE_FLOAT16_VALUE(27.13), SIMDE_FLOAT16_VALUE(-28.58) }, + { SIMDE_FLOAT16_VALUE(-7.95), SIMDE_FLOAT16_VALUE(-1.21), SIMDE_FLOAT16_VALUE(17.08), SIMDE_FLOAT16_VALUE(39.81) } } }, + { { { SIMDE_FLOAT16_VALUE(42.99), SIMDE_FLOAT16_VALUE(-22.57), SIMDE_FLOAT16_VALUE(-7.51), SIMDE_FLOAT16_VALUE(-37.37) }, + { SIMDE_FLOAT16_VALUE(15.49), SIMDE_FLOAT16_VALUE(-14.01), SIMDE_FLOAT16_VALUE(30.43), SIMDE_FLOAT16_VALUE(-22.10) }, + { SIMDE_FLOAT16_VALUE(-17.08), SIMDE_FLOAT16_VALUE(28.40), SIMDE_FLOAT16_VALUE(-3.15), SIMDE_FLOAT16_VALUE(-9.72) } }, + { SIMDE_FLOAT16_VALUE(37.03), SIMDE_FLOAT16_VALUE(-46.18), SIMDE_FLOAT16_VALUE(30.55)}, + { { SIMDE_FLOAT16_VALUE(42.99), SIMDE_FLOAT16_VALUE(-22.57), SIMDE_FLOAT16_VALUE(37.03), SIMDE_FLOAT16_VALUE(-37.37) }, + { SIMDE_FLOAT16_VALUE(15.49), SIMDE_FLOAT16_VALUE(-14.01), SIMDE_FLOAT16_VALUE(-46.18), SIMDE_FLOAT16_VALUE(-22.10) }, + { SIMDE_FLOAT16_VALUE(-17.08), SIMDE_FLOAT16_VALUE(28.40), SIMDE_FLOAT16_VALUE(30.55), SIMDE_FLOAT16_VALUE(-9.72) } } }, + { { { SIMDE_FLOAT16_VALUE(-27.48), SIMDE_FLOAT16_VALUE(13.58), SIMDE_FLOAT16_VALUE(-24.04), SIMDE_FLOAT16_VALUE(4.66) }, + { SIMDE_FLOAT16_VALUE(29.23), SIMDE_FLOAT16_VALUE(31.79), SIMDE_FLOAT16_VALUE(-45.75), SIMDE_FLOAT16_VALUE(38.47) }, + { SIMDE_FLOAT16_VALUE(-41.39), SIMDE_FLOAT16_VALUE(-33.55), SIMDE_FLOAT16_VALUE(-46.68), SIMDE_FLOAT16_VALUE(19.26) } }, + { SIMDE_FLOAT16_VALUE(39.57), SIMDE_FLOAT16_VALUE(-22.13), SIMDE_FLOAT16_VALUE(6.04)}, + { { SIMDE_FLOAT16_VALUE(-27.48), SIMDE_FLOAT16_VALUE(13.58), SIMDE_FLOAT16_VALUE(-24.04), SIMDE_FLOAT16_VALUE(39.57) }, + { SIMDE_FLOAT16_VALUE(29.23), SIMDE_FLOAT16_VALUE(31.79), SIMDE_FLOAT16_VALUE(-45.75), SIMDE_FLOAT16_VALUE(-22.13) }, + { SIMDE_FLOAT16_VALUE(-41.39), SIMDE_FLOAT16_VALUE(-33.55), SIMDE_FLOAT16_VALUE(-46.68), SIMDE_FLOAT16_VALUE(6.04) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4x3_t r, src, expected; + src.val[0] = simde_vld1_f16(test_vec[i].src[0]); + src.val[1] = simde_vld1_f16(test_vec[i].src[1]); + src.val[2] = simde_vld1_f16(test_vec[i].src[2]); + + SIMDE_CONSTIFY_4_(simde_vld3_lane_f16, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1_f16(test_vec[i].r[0]); + expected.val[1] = simde_vld1_f16(test_vec[i].r[1]); + expected.val[2] = simde_vld1_f16(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_f16x4(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f16x4(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f16x4(r.val[2], expected.val[2], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld3_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32_t src[3][2]; + simde_float32_t buf[3]; + simde_float32_t r[3][2]; + } test_vec[] = { + { { { SIMDE_FLOAT32_C(-3815.63), SIMDE_FLOAT32_C(-1707.02) }, + { SIMDE_FLOAT32_C(1574.07), SIMDE_FLOAT32_C(887.99) }, + { SIMDE_FLOAT32_C(2512.31), SIMDE_FLOAT32_C(4889.59) } }, + { SIMDE_FLOAT32_C(-215.06), SIMDE_FLOAT32_C(-3372.22), SIMDE_FLOAT32_C(-242.19)}, + { { SIMDE_FLOAT32_C(-215.06), SIMDE_FLOAT32_C(-1707.02) }, + { SIMDE_FLOAT32_C(-3372.22), SIMDE_FLOAT32_C(887.99) }, + { SIMDE_FLOAT32_C(-242.19), SIMDE_FLOAT32_C(4889.59) } } }, + { { { SIMDE_FLOAT32_C(-514.65), SIMDE_FLOAT32_C(3958.55) }, + { SIMDE_FLOAT32_C(-1426.95), SIMDE_FLOAT32_C(1871.07) }, + { SIMDE_FLOAT32_C(-3767.33), SIMDE_FLOAT32_C(2972.99) } }, + { SIMDE_FLOAT32_C(-2565.02), SIMDE_FLOAT32_C(-4895.28), SIMDE_FLOAT32_C(2142.44)}, + { { SIMDE_FLOAT32_C(-514.65), SIMDE_FLOAT32_C(-2565.02) }, + { SIMDE_FLOAT32_C(-1426.95), SIMDE_FLOAT32_C(-4895.28) }, + { SIMDE_FLOAT32_C(-3767.33), SIMDE_FLOAT32_C(2142.44) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2x3_t r, src, expected; + src.val[0] = simde_vld1_f32(test_vec[i].src[0]); + src.val[1] = simde_vld1_f32(test_vec[i].src[1]); + src.val[2] = simde_vld1_f32(test_vec[i].src[2]); + + SIMDE_CONSTIFY_2_(simde_vld3_lane_f32, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1_f32(test_vec[i].r[0]); + expected.val[1] = simde_vld1_f32(test_vec[i].r[1]); + expected.val[2] = simde_vld1_f32(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_f32x2(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f32x2(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f32x2(r.val[2], expected.val[2], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld3_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64_t src[3][1]; + simde_float64_t buf[3]; + simde_float64_t r[3][1]; + } test_vec[] = { + { { { SIMDE_FLOAT64_C(-227977.76) }, + { SIMDE_FLOAT64_C(-358189.91) }, + { SIMDE_FLOAT64_C(329725.67) } }, + { SIMDE_FLOAT64_C(-308263.36), SIMDE_FLOAT64_C(134133.66), SIMDE_FLOAT64_C(344837.21)}, + { { SIMDE_FLOAT64_C(-308263.36) }, + { SIMDE_FLOAT64_C(134133.66) }, + { SIMDE_FLOAT64_C(344837.21) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1x3_t r, src, expected; + src.val[0] = simde_vld1_f64(test_vec[i].src[0]); + src.val[1] = simde_vld1_f64(test_vec[i].src[1]); + src.val[2] = simde_vld1_f64(test_vec[i].src[2]); + + r = simde_vld3_lane_f64(test_vec[i].buf, src, 0); + + expected.val[0] = simde_vld1_f64(test_vec[i].r[0]); + expected.val[1] = simde_vld1_f64(test_vec[i].r[1]); + expected.val[2] = simde_vld1_f64(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_f64x1(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f64x1(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f64x1(r.val[2], expected.val[2], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld3q_lane_s8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int8_t src[3][16]; + int8_t buf[3]; + int8_t r[3][16]; + } test_vec[] = { + { { { -INT8_C(28), -INT8_C(13), -INT8_C(35), INT8_C(39), -INT8_C(35), INT8_C(35), -INT8_C(17), INT8_C(11), + INT8_C(40), -INT8_C(11), -INT8_C(11), INT8_C(24), INT8_C(42), INT8_C(33), INT8_C(15), -INT8_C(30) }, + { -INT8_C(10), -INT8_C(12), INT8_C(18), -INT8_C(9), INT8_C(27), -INT8_C(30), -INT8_C(28), INT8_C(43), + INT8_C(13), -INT8_C(3), INT8_C(33), INT8_C(34), INT8_C(27), INT8_C(9), -INT8_C(23), INT8_C(48) }, + { -INT8_C(7), -INT8_C(13), INT8_C(19), INT8_C(4), INT8_C(17), -INT8_C(17), -INT8_C(10), -INT8_C(24), + INT8_C(6), INT8_C(1), INT8_C(22), -INT8_C(47), -INT8_C(47), INT8_C(0), INT8_C(13), INT8_C(5) } }, + { -INT8_C(31), -INT8_C(4), INT8_C(11)}, + { { -INT8_C(31), -INT8_C(13), -INT8_C(35), INT8_C(39), -INT8_C(35), INT8_C(35), -INT8_C(17), INT8_C(11), + INT8_C(40), -INT8_C(11), -INT8_C(11), INT8_C(24), INT8_C(42), INT8_C(33), INT8_C(15), -INT8_C(30) }, + { -INT8_C(4), -INT8_C(12), INT8_C(18), -INT8_C(9), INT8_C(27), -INT8_C(30), -INT8_C(28), INT8_C(43), + INT8_C(13), -INT8_C(3), INT8_C(33), INT8_C(34), INT8_C(27), INT8_C(9), -INT8_C(23), INT8_C(48) }, + { INT8_C(11), -INT8_C(13), INT8_C(19), INT8_C(4), INT8_C(17), -INT8_C(17), -INT8_C(10), -INT8_C(24), + INT8_C(6), INT8_C(1), INT8_C(22), -INT8_C(47), -INT8_C(47), INT8_C(0), INT8_C(13), INT8_C(5) } } }, + { { { -INT8_C(30), INT8_C(25), -INT8_C(8), -INT8_C(18), INT8_C(19), INT8_C(15), INT8_C(31), -INT8_C(19), + -INT8_C(2), -INT8_C(45), INT8_C(24), -INT8_C(25), -INT8_C(20), INT8_C(34), INT8_C(37), INT8_C(38) }, + { INT8_C(2), -INT8_C(4), -INT8_C(49), INT8_C(44), -INT8_C(26), INT8_C(24), -INT8_C(40), INT8_C(3), + -INT8_C(38), INT8_C(5), INT8_C(45), -INT8_C(4), -INT8_C(21), -INT8_C(40), INT8_C(49), -INT8_C(33) }, + { -INT8_C(42), INT8_C(29), -INT8_C(24), INT8_C(16), INT8_C(25), INT8_C(4), INT8_C(13), -INT8_C(24), + INT8_C(10), -INT8_C(25), INT8_C(13), -INT8_C(3), INT8_C(36), -INT8_C(44), -INT8_C(1), -INT8_C(33) } }, + { INT8_C(46), INT8_C(42), -INT8_C(17)}, + { { -INT8_C(30), INT8_C(46), -INT8_C(8), -INT8_C(18), INT8_C(19), INT8_C(15), INT8_C(31), -INT8_C(19), + -INT8_C(2), -INT8_C(45), INT8_C(24), -INT8_C(25), -INT8_C(20), INT8_C(34), INT8_C(37), INT8_C(38) }, + { INT8_C(2), INT8_C(42), -INT8_C(49), INT8_C(44), -INT8_C(26), INT8_C(24), -INT8_C(40), INT8_C(3), + -INT8_C(38), INT8_C(5), INT8_C(45), -INT8_C(4), -INT8_C(21), -INT8_C(40), INT8_C(49), -INT8_C(33) }, + { -INT8_C(42), -INT8_C(17), -INT8_C(24), INT8_C(16), INT8_C(25), INT8_C(4), INT8_C(13), -INT8_C(24), + INT8_C(10), -INT8_C(25), INT8_C(13), -INT8_C(3), INT8_C(36), -INT8_C(44), -INT8_C(1), -INT8_C(33) } } }, + { { { INT8_C(15), -INT8_C(42), -INT8_C(16), INT8_C(46), -INT8_C(23), INT8_C(33), -INT8_C(21), -INT8_C(49), + INT8_C(5), INT8_C(47), INT8_C(42), INT8_C(19), -INT8_C(24), -INT8_C(13), -INT8_C(36), -INT8_C(34) }, + { -INT8_C(48), INT8_C(19), INT8_C(41), INT8_C(8), INT8_C(14), -INT8_C(1), -INT8_C(48), INT8_C(31), + INT8_C(40), -INT8_C(3), INT8_C(9), -INT8_C(21), INT8_C(21), -INT8_C(3), INT8_C(2), -INT8_C(47) }, + { INT8_C(24), INT8_C(8), -INT8_C(1), INT8_C(37), INT8_C(18), INT8_C(37), -INT8_C(27), INT8_C(40), + -INT8_C(42), INT8_C(42), -INT8_C(30), INT8_C(0), INT8_C(45), -INT8_C(6), INT8_C(39), INT8_C(45) } }, + { INT8_C(23), INT8_C(20), -INT8_C(12)}, + { { INT8_C(15), -INT8_C(42), INT8_C(23), INT8_C(46), -INT8_C(23), INT8_C(33), -INT8_C(21), -INT8_C(49), + INT8_C(5), INT8_C(47), INT8_C(42), INT8_C(19), -INT8_C(24), -INT8_C(13), -INT8_C(36), -INT8_C(34) }, + { -INT8_C(48), INT8_C(19), INT8_C(20), INT8_C(8), INT8_C(14), -INT8_C(1), -INT8_C(48), INT8_C(31), + INT8_C(40), -INT8_C(3), INT8_C(9), -INT8_C(21), INT8_C(21), -INT8_C(3), INT8_C(2), -INT8_C(47) }, + { INT8_C(24), INT8_C(8), -INT8_C(12), INT8_C(37), INT8_C(18), INT8_C(37), -INT8_C(27), INT8_C(40), + -INT8_C(42), INT8_C(42), -INT8_C(30), INT8_C(0), INT8_C(45), -INT8_C(6), INT8_C(39), INT8_C(45) } } }, + { { { -INT8_C(4), INT8_C(24), INT8_C(10), -INT8_C(19), INT8_C(44), INT8_C(1), INT8_C(1), INT8_C(13), + -INT8_C(19), INT8_C(8), -INT8_C(47), INT8_C(45), INT8_C(21), -INT8_C(16), -INT8_C(46), -INT8_C(32) }, + { INT8_C(5), INT8_C(10), -INT8_C(16), -INT8_C(23), -INT8_C(15), INT8_C(39), INT8_C(1), INT8_C(48), + INT8_C(17), -INT8_C(13), INT8_C(35), INT8_C(11), INT8_C(32), INT8_C(15), INT8_C(11), -INT8_C(25) }, + { -INT8_C(11), INT8_C(29), -INT8_C(23), INT8_C(17), -INT8_C(31), INT8_C(40), INT8_C(34), INT8_C(13), + INT8_C(11), -INT8_C(48), INT8_C(18), INT8_C(19), -INT8_C(21), INT8_C(31), -INT8_C(42), -INT8_C(14) } }, + { -INT8_C(26), INT8_C(0), INT8_C(4)}, + { { -INT8_C(4), INT8_C(24), INT8_C(10), -INT8_C(26), INT8_C(44), INT8_C(1), INT8_C(1), INT8_C(13), + -INT8_C(19), INT8_C(8), -INT8_C(47), INT8_C(45), INT8_C(21), -INT8_C(16), -INT8_C(46), -INT8_C(32) }, + { INT8_C(5), INT8_C(10), -INT8_C(16), INT8_C(0), -INT8_C(15), INT8_C(39), INT8_C(1), INT8_C(48), + INT8_C(17), -INT8_C(13), INT8_C(35), INT8_C(11), INT8_C(32), INT8_C(15), INT8_C(11), -INT8_C(25) }, + { -INT8_C(11), INT8_C(29), -INT8_C(23), INT8_C(4), -INT8_C(31), INT8_C(40), INT8_C(34), INT8_C(13), + INT8_C(11), -INT8_C(48), INT8_C(18), INT8_C(19), -INT8_C(21), INT8_C(31), -INT8_C(42), -INT8_C(14) } } }, + { { { INT8_C(44), INT8_C(25), -INT8_C(6), INT8_C(45), INT8_C(14), -INT8_C(38), -INT8_C(31), INT8_C(21), + INT8_C(8), INT8_C(36), -INT8_C(2), -INT8_C(13), INT8_C(16), INT8_C(12), -INT8_C(23), INT8_C(34) }, + { INT8_C(43), INT8_C(24), INT8_C(19), -INT8_C(35), INT8_C(16), INT8_C(10), INT8_C(29), INT8_C(26), + INT8_C(36), -INT8_C(27), INT8_C(47), INT8_C(32), INT8_C(6), -INT8_C(29), INT8_C(30), -INT8_C(30) }, + { -INT8_C(5), -INT8_C(41), INT8_C(19), INT8_C(6), INT8_C(10), INT8_C(11), INT8_C(17), INT8_C(24), + -INT8_C(24), INT8_C(26), INT8_C(35), INT8_C(17), -INT8_C(12), -INT8_C(38), -INT8_C(22), -INT8_C(30) } }, + { INT8_C(31), -INT8_C(38), INT8_C(8)}, + { { INT8_C(44), INT8_C(25), -INT8_C(6), INT8_C(45), INT8_C(31), -INT8_C(38), -INT8_C(31), INT8_C(21), + INT8_C(8), INT8_C(36), -INT8_C(2), -INT8_C(13), INT8_C(16), INT8_C(12), -INT8_C(23), INT8_C(34) }, + { INT8_C(43), INT8_C(24), INT8_C(19), -INT8_C(35), -INT8_C(38), INT8_C(10), INT8_C(29), INT8_C(26), + INT8_C(36), -INT8_C(27), INT8_C(47), INT8_C(32), INT8_C(6), -INT8_C(29), INT8_C(30), -INT8_C(30) }, + { -INT8_C(5), -INT8_C(41), INT8_C(19), INT8_C(6), INT8_C(8), INT8_C(11), INT8_C(17), INT8_C(24), + -INT8_C(24), INT8_C(26), INT8_C(35), INT8_C(17), -INT8_C(12), -INT8_C(38), -INT8_C(22), -INT8_C(30) } } }, + { { { -INT8_C(29), -INT8_C(16), INT8_C(26), INT8_C(17), INT8_C(2), -INT8_C(4), INT8_C(3), -INT8_C(28), + INT8_C(0), INT8_C(15), INT8_C(44), -INT8_C(43), -INT8_C(1), INT8_C(11), -INT8_C(5), INT8_C(2) }, + { INT8_C(19), -INT8_C(26), -INT8_C(39), INT8_C(43), INT8_C(10), INT8_C(18), -INT8_C(32), INT8_C(42), + -INT8_C(30), -INT8_C(1), -INT8_C(1), INT8_C(22), -INT8_C(34), -INT8_C(11), -INT8_C(40), INT8_C(47) }, + { INT8_C(5), INT8_C(36), INT8_C(0), -INT8_C(6), -INT8_C(46), -INT8_C(23), -INT8_C(48), INT8_C(6), + INT8_C(45), -INT8_C(35), INT8_C(49), INT8_C(19), -INT8_C(32), -INT8_C(32), -INT8_C(44), -INT8_C(5) } }, + { -INT8_C(45), INT8_C(0), INT8_C(44)}, + { { -INT8_C(29), -INT8_C(16), INT8_C(26), INT8_C(17), INT8_C(2), -INT8_C(45), INT8_C(3), -INT8_C(28), + INT8_C(0), INT8_C(15), INT8_C(44), -INT8_C(43), -INT8_C(1), INT8_C(11), -INT8_C(5), INT8_C(2) }, + { INT8_C(19), -INT8_C(26), -INT8_C(39), INT8_C(43), INT8_C(10), INT8_C(0), -INT8_C(32), INT8_C(42), + -INT8_C(30), -INT8_C(1), -INT8_C(1), INT8_C(22), -INT8_C(34), -INT8_C(11), -INT8_C(40), INT8_C(47) }, + { INT8_C(5), INT8_C(36), INT8_C(0), -INT8_C(6), -INT8_C(46), INT8_C(44), -INT8_C(48), INT8_C(6), + INT8_C(45), -INT8_C(35), INT8_C(49), INT8_C(19), -INT8_C(32), -INT8_C(32), -INT8_C(44), -INT8_C(5) } } }, + { { { -INT8_C(22), -INT8_C(22), INT8_C(14), INT8_C(5), -INT8_C(3), -INT8_C(5), INT8_C(22), -INT8_C(29), + -INT8_C(21), -INT8_C(5), INT8_C(21), -INT8_C(5), INT8_C(6), INT8_C(36), INT8_C(29), -INT8_C(23) }, + { INT8_C(43), -INT8_C(25), INT8_C(21), -INT8_C(25), -INT8_C(18), INT8_C(38), -INT8_C(31), -INT8_C(7), + INT8_C(25), INT8_C(7), INT8_C(39), -INT8_C(35), -INT8_C(2), INT8_C(22), -INT8_C(46), -INT8_C(7) }, + { -INT8_C(49), -INT8_C(16), INT8_C(34), INT8_C(43), INT8_C(38), -INT8_C(5), -INT8_C(8), INT8_C(0), + INT8_C(3), -INT8_C(19), INT8_C(48), INT8_C(42), INT8_C(25), INT8_C(5), INT8_C(25), INT8_C(17) } }, + { INT8_C(2), INT8_C(43), INT8_C(12)}, + { { -INT8_C(22), -INT8_C(22), INT8_C(14), INT8_C(5), -INT8_C(3), -INT8_C(5), INT8_C(2), -INT8_C(29), + -INT8_C(21), -INT8_C(5), INT8_C(21), -INT8_C(5), INT8_C(6), INT8_C(36), INT8_C(29), -INT8_C(23) }, + { INT8_C(43), -INT8_C(25), INT8_C(21), -INT8_C(25), -INT8_C(18), INT8_C(38), INT8_C(43), -INT8_C(7), + INT8_C(25), INT8_C(7), INT8_C(39), -INT8_C(35), -INT8_C(2), INT8_C(22), -INT8_C(46), -INT8_C(7) }, + { -INT8_C(49), -INT8_C(16), INT8_C(34), INT8_C(43), INT8_C(38), -INT8_C(5), INT8_C(12), INT8_C(0), + INT8_C(3), -INT8_C(19), INT8_C(48), INT8_C(42), INT8_C(25), INT8_C(5), INT8_C(25), INT8_C(17) } } }, + { { { -INT8_C(24), INT8_C(26), -INT8_C(29), -INT8_C(11), INT8_C(49), -INT8_C(23), -INT8_C(40), -INT8_C(12), + -INT8_C(26), INT8_C(39), -INT8_C(8), INT8_C(5), INT8_C(11), INT8_C(35), -INT8_C(35), -INT8_C(21) }, + { INT8_C(43), INT8_C(15), INT8_C(44), -INT8_C(25), -INT8_C(35), -INT8_C(16), -INT8_C(6), INT8_C(6), + INT8_C(27), -INT8_C(17), -INT8_C(21), INT8_C(22), INT8_C(8), INT8_C(35), INT8_C(10), INT8_C(18) }, + { -INT8_C(21), INT8_C(7), INT8_C(41), -INT8_C(40), INT8_C(22), -INT8_C(13), -INT8_C(7), -INT8_C(16), + INT8_C(30), INT8_C(48), INT8_C(10), INT8_C(30), -INT8_C(9), INT8_C(43), -INT8_C(16), INT8_C(39) } }, + { -INT8_C(4), INT8_C(38), INT8_C(2)}, + { { -INT8_C(24), INT8_C(26), -INT8_C(29), -INT8_C(11), INT8_C(49), -INT8_C(23), -INT8_C(40), -INT8_C(4), + -INT8_C(26), INT8_C(39), -INT8_C(8), INT8_C(5), INT8_C(11), INT8_C(35), -INT8_C(35), -INT8_C(21) }, + { INT8_C(43), INT8_C(15), INT8_C(44), -INT8_C(25), -INT8_C(35), -INT8_C(16), -INT8_C(6), INT8_C(38), + INT8_C(27), -INT8_C(17), -INT8_C(21), INT8_C(22), INT8_C(8), INT8_C(35), INT8_C(10), INT8_C(18) }, + { -INT8_C(21), INT8_C(7), INT8_C(41), -INT8_C(40), INT8_C(22), -INT8_C(13), -INT8_C(7), INT8_C(2), + INT8_C(30), INT8_C(48), INT8_C(10), INT8_C(30), -INT8_C(9), INT8_C(43), -INT8_C(16), INT8_C(39) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x16x3_t r, src, expected; + src.val[0] = simde_vld1q_s8(test_vec[i].src[0]); + src.val[1] = simde_vld1q_s8(test_vec[i].src[1]); + src.val[2] = simde_vld1q_s8(test_vec[i].src[2]); + + SIMDE_CONSTIFY_16_(simde_vld3q_lane_s8, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_s8(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_s8(test_vec[i].r[1]); + expected.val[2] = simde_vld1q_s8(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_i8x16(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i8x16(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i8x16(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3q_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t src[3][8]; + int16_t buf[3]; + int16_t r[3][8]; + } test_vec[] = { + { { { -INT16_C(2544), -INT16_C(4642), INT16_C(1229), -INT16_C(3331), + INT16_C(1420), INT16_C(2297), -INT16_C(3685), -INT16_C(1960) }, + { -INT16_C(1002), -INT16_C(55), INT16_C(253), -INT16_C(3175), + INT16_C(4758), -INT16_C(663), INT16_C(4619), -INT16_C(514) }, + { INT16_C(361), INT16_C(2026), INT16_C(1410), -INT16_C(931), + INT16_C(2343), -INT16_C(2337), INT16_C(1862), -INT16_C(4072) } }, + { INT16_C(3171), INT16_C(4950), -INT16_C(4704)}, + { { INT16_C(3171), -INT16_C(4642), INT16_C(1229), -INT16_C(3331), + INT16_C(1420), INT16_C(2297), -INT16_C(3685), -INT16_C(1960) }, + { INT16_C(4950), -INT16_C(55), INT16_C(253), -INT16_C(3175), + INT16_C(4758), -INT16_C(663), INT16_C(4619), -INT16_C(514) }, + { -INT16_C(4704), INT16_C(2026), INT16_C(1410), -INT16_C(931), + INT16_C(2343), -INT16_C(2337), INT16_C(1862), -INT16_C(4072) } } }, + { { { -INT16_C(2056), -INT16_C(2346), INT16_C(48), -INT16_C(1513), + -INT16_C(1284), -INT16_C(2465), INT16_C(2451), INT16_C(3724) }, + { -INT16_C(2631), -INT16_C(2352), -INT16_C(4669), -INT16_C(3875), + -INT16_C(1356), INT16_C(4686), -INT16_C(1134), INT16_C(4680) }, + { INT16_C(829), -INT16_C(3778), -INT16_C(1514), INT16_C(3181), + INT16_C(233), -INT16_C(285), -INT16_C(2967), INT16_C(4139) } }, + { -INT16_C(761), INT16_C(4943), INT16_C(3163)}, + { { -INT16_C(2056), -INT16_C(761), INT16_C(48), -INT16_C(1513), + -INT16_C(1284), -INT16_C(2465), INT16_C(2451), INT16_C(3724) }, + { -INT16_C(2631), INT16_C(4943), -INT16_C(4669), -INT16_C(3875), + -INT16_C(1356), INT16_C(4686), -INT16_C(1134), INT16_C(4680) }, + { INT16_C(829), INT16_C(3163), -INT16_C(1514), INT16_C(3181), + INT16_C(233), -INT16_C(285), -INT16_C(2967), INT16_C(4139) } } }, + { { { -INT16_C(532), -INT16_C(2343), INT16_C(4528), -INT16_C(3432), + INT16_C(2221), INT16_C(3807), -INT16_C(1256), -INT16_C(366) }, + { -INT16_C(3053), -INT16_C(2035), INT16_C(4749), -INT16_C(876), + -INT16_C(3988), -INT16_C(2478), -INT16_C(160), INT16_C(1331) }, + { -INT16_C(4671), -INT16_C(1438), INT16_C(756), -INT16_C(652), + -INT16_C(4517), -INT16_C(1746), INT16_C(849), INT16_C(1421) } }, + { -INT16_C(1857), INT16_C(1429), -INT16_C(481)}, + { { -INT16_C(532), -INT16_C(2343), -INT16_C(1857), -INT16_C(3432), + INT16_C(2221), INT16_C(3807), -INT16_C(1256), -INT16_C(366) }, + { -INT16_C(3053), -INT16_C(2035), INT16_C(1429), -INT16_C(876), + -INT16_C(3988), -INT16_C(2478), -INT16_C(160), INT16_C(1331) }, + { -INT16_C(4671), -INT16_C(1438), -INT16_C(481), -INT16_C(652), + -INT16_C(4517), -INT16_C(1746), INT16_C(849), INT16_C(1421) } } }, + { { { -INT16_C(3293), INT16_C(1799), -INT16_C(2614), INT16_C(2645), + INT16_C(154), INT16_C(1399), INT16_C(894), INT16_C(1463) }, + { -INT16_C(714), INT16_C(3903), -INT16_C(87), -INT16_C(1409), + INT16_C(4155), -INT16_C(3942), -INT16_C(3803), -INT16_C(3385) }, + { INT16_C(3632), -INT16_C(4465), INT16_C(4972), INT16_C(2353), + -INT16_C(554), -INT16_C(4589), -INT16_C(1038), -INT16_C(3827) } }, + { INT16_C(795), -INT16_C(3260), -INT16_C(2863)}, + { { -INT16_C(3293), INT16_C(1799), -INT16_C(2614), INT16_C(795), + INT16_C(154), INT16_C(1399), INT16_C(894), INT16_C(1463) }, + { -INT16_C(714), INT16_C(3903), -INT16_C(87), -INT16_C(3260), + INT16_C(4155), -INT16_C(3942), -INT16_C(3803), -INT16_C(3385) }, + { INT16_C(3632), -INT16_C(4465), INT16_C(4972), -INT16_C(2863), + -INT16_C(554), -INT16_C(4589), -INT16_C(1038), -INT16_C(3827) } } }, + { { { INT16_C(4874), INT16_C(1787), INT16_C(3061), INT16_C(1342), + -INT16_C(4591), -INT16_C(1421), INT16_C(1230), INT16_C(3115) }, + { INT16_C(2715), INT16_C(2686), -INT16_C(4982), -INT16_C(4201), + INT16_C(1874), -INT16_C(349), INT16_C(3167), INT16_C(1853) }, + { -INT16_C(599), INT16_C(3177), INT16_C(2976), INT16_C(440), + -INT16_C(4614), INT16_C(3956), -INT16_C(1913), INT16_C(3599) } }, + { INT16_C(2739), INT16_C(4689), -INT16_C(4528)}, + { { INT16_C(4874), INT16_C(1787), INT16_C(3061), INT16_C(1342), + INT16_C(2739), -INT16_C(1421), INT16_C(1230), INT16_C(3115) }, + { INT16_C(2715), INT16_C(2686), -INT16_C(4982), -INT16_C(4201), + INT16_C(4689), -INT16_C(349), INT16_C(3167), INT16_C(1853) }, + { -INT16_C(599), INT16_C(3177), INT16_C(2976), INT16_C(440), + -INT16_C(4528), INT16_C(3956), -INT16_C(1913), INT16_C(3599) } } }, + { { { INT16_C(2333), -INT16_C(4220), -INT16_C(156), INT16_C(2999), + -INT16_C(756), INT16_C(1397), INT16_C(3426), INT16_C(1946) }, + { -INT16_C(377), -INT16_C(773), -INT16_C(181), INT16_C(4655), + INT16_C(577), INT16_C(2152), INT16_C(1822), INT16_C(1387) }, + { INT16_C(1673), -INT16_C(1590), INT16_C(2338), -INT16_C(4018), + INT16_C(3127), INT16_C(1790), INT16_C(444), INT16_C(1571) } }, + { INT16_C(4725), -INT16_C(3008), INT16_C(3862)}, + { { INT16_C(2333), -INT16_C(4220), -INT16_C(156), INT16_C(2999), + -INT16_C(756), INT16_C(4725), INT16_C(3426), INT16_C(1946) }, + { -INT16_C(377), -INT16_C(773), -INT16_C(181), INT16_C(4655), + INT16_C(577), -INT16_C(3008), INT16_C(1822), INT16_C(1387) }, + { INT16_C(1673), -INT16_C(1590), INT16_C(2338), -INT16_C(4018), + INT16_C(3127), INT16_C(3862), INT16_C(444), INT16_C(1571) } } }, + { { { -INT16_C(1060), -INT16_C(75), INT16_C(3183), INT16_C(4253), + -INT16_C(1840), INT16_C(4704), -INT16_C(629), INT16_C(3253) }, + { -INT16_C(34), -INT16_C(4041), -INT16_C(1971), -INT16_C(895), + INT16_C(1253), -INT16_C(1347), -INT16_C(4546), INT16_C(3931) }, + { -INT16_C(149), -INT16_C(4817), -INT16_C(2596), -INT16_C(3858), + INT16_C(2456), -INT16_C(2586), -INT16_C(2889), INT16_C(134) } }, + { -INT16_C(4708), INT16_C(933), -INT16_C(3647)}, + { { -INT16_C(1060), -INT16_C(75), INT16_C(3183), INT16_C(4253), + -INT16_C(1840), INT16_C(4704), -INT16_C(4708), INT16_C(3253) }, + { -INT16_C(34), -INT16_C(4041), -INT16_C(1971), -INT16_C(895), + INT16_C(1253), -INT16_C(1347), INT16_C(933), INT16_C(3931) }, + { -INT16_C(149), -INT16_C(4817), -INT16_C(2596), -INT16_C(3858), + INT16_C(2456), -INT16_C(2586), -INT16_C(3647), INT16_C(134) } } }, + { { { INT16_C(3916), -INT16_C(4828), INT16_C(1552), INT16_C(4046), + -INT16_C(1748), INT16_C(4595), -INT16_C(3476), -INT16_C(3084) }, + { INT16_C(3756), -INT16_C(3946), INT16_C(4017), -INT16_C(676), + INT16_C(2556), INT16_C(1106), INT16_C(2577), INT16_C(782) }, + { -INT16_C(2936), -INT16_C(4886), -INT16_C(4608), -INT16_C(46), + INT16_C(2459), INT16_C(348), INT16_C(3567), INT16_C(1859) } }, + { -INT16_C(521), INT16_C(4002), -INT16_C(2389)}, + { { INT16_C(3916), -INT16_C(4828), INT16_C(1552), INT16_C(4046), + -INT16_C(1748), INT16_C(4595), -INT16_C(3476), -INT16_C(521) }, + { INT16_C(3756), -INT16_C(3946), INT16_C(4017), -INT16_C(676), + INT16_C(2556), INT16_C(1106), INT16_C(2577), INT16_C(4002) }, + { -INT16_C(2936), -INT16_C(4886), -INT16_C(4608), -INT16_C(46), + INT16_C(2459), INT16_C(348), INT16_C(3567), -INT16_C(2389) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8x3_t r, src, expected; + src.val[0] = simde_vld1q_s16(test_vec[i].src[0]); + src.val[1] = simde_vld1q_s16(test_vec[i].src[1]); + src.val[2] = simde_vld1q_s16(test_vec[i].src[2]); + + SIMDE_CONSTIFY_8_(simde_vld3q_lane_s16, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_s16(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_s16(test_vec[i].r[1]); + expected.val[2] = simde_vld1q_s16(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_i16x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i16x8(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i16x8(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3q_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t src[3][4]; + int32_t buf[3]; + int32_t r[3][4]; + } test_vec[] = { + { { { -INT32_C(274313), INT32_C(237242), -INT32_C(282304), INT32_C(204638) }, + { -INT32_C(80161), INT32_C(113801), INT32_C(414489), INT32_C(260239) }, + { INT32_C(164558), INT32_C(347080), INT32_C(456851), -INT32_C(190728) } }, + { -INT32_C(339723), INT32_C(150385), INT32_C(492259)}, + { { -INT32_C(339723), INT32_C(237242), -INT32_C(282304), INT32_C(204638) }, + { INT32_C(150385), INT32_C(113801), INT32_C(414489), INT32_C(260239) }, + { INT32_C(492259), INT32_C(347080), INT32_C(456851), -INT32_C(190728) } } }, + { { { -INT32_C(255470), -INT32_C(494394), -INT32_C(176874), -INT32_C(178296) }, + { -INT32_C(495850), INT32_C(428070), -INT32_C(145411), INT32_C(161255) }, + { INT32_C(169), -INT32_C(335586), INT32_C(344582), INT32_C(177465) } }, + { -INT32_C(76592), INT32_C(117895), -INT32_C(41174)}, + { { -INT32_C(255470), -INT32_C(76592), -INT32_C(176874), -INT32_C(178296) }, + { -INT32_C(495850), INT32_C(117895), -INT32_C(145411), INT32_C(161255) }, + { INT32_C(169), -INT32_C(41174), INT32_C(344582), INT32_C(177465) } } }, + { { { INT32_C(8460), -INT32_C(282882), INT32_C(334033), -INT32_C(249990) }, + { -INT32_C(299308), INT32_C(281874), -INT32_C(265183), INT32_C(45063) }, + { INT32_C(414608), INT32_C(338741), -INT32_C(193727), -INT32_C(266929) } }, + { INT32_C(94121), INT32_C(392450), INT32_C(344631)}, + { { INT32_C(8460), -INT32_C(282882), INT32_C(94121), -INT32_C(249990) }, + { -INT32_C(299308), INT32_C(281874), INT32_C(392450), INT32_C(45063) }, + { INT32_C(414608), INT32_C(338741), INT32_C(344631), -INT32_C(266929) } } }, + { { { -INT32_C(358516), -INT32_C(140139), INT32_C(6751), INT32_C(99765) }, + { INT32_C(264528), INT32_C(267803), INT32_C(261941), INT32_C(401620) }, + { -INT32_C(63290), INT32_C(145380), INT32_C(381541), -INT32_C(1568) } }, + { -INT32_C(327720), INT32_C(173528), INT32_C(6300)}, + { { -INT32_C(358516), -INT32_C(140139), INT32_C(6751), -INT32_C(327720) }, + { INT32_C(264528), INT32_C(267803), INT32_C(261941), INT32_C(173528) }, + { -INT32_C(63290), INT32_C(145380), INT32_C(381541), INT32_C(6300) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4x3_t r, src, expected; + src.val[0] = simde_vld1q_s32(test_vec[i].src[0]); + src.val[1] = simde_vld1q_s32(test_vec[i].src[1]); + src.val[2] = simde_vld1q_s32(test_vec[i].src[2]); + + SIMDE_CONSTIFY_4_(simde_vld3q_lane_s32, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_s32(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_s32(test_vec[i].r[1]); + expected.val[2] = simde_vld1q_s32(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_i32x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i32x4(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i32x4(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3q_lane_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t src[3][2]; + int64_t buf[3]; + int64_t r[3][2]; + } test_vec[] = { + { { { INT64_C(46945790), -INT64_C(17840845) }, + { INT64_C(28832924), INT64_C(18337985) }, + { INT64_C(31005442), INT64_C(4032821) } }, + { -INT64_C(17614845), INT64_C(34213434), INT64_C(47541946)}, + { { -INT64_C(17614845), -INT64_C(17840845) }, + { INT64_C(34213434), INT64_C(18337985) }, + { INT64_C(47541946), INT64_C(4032821) } } }, + { { { INT64_C(19812609), -INT64_C(9082080) }, + { -INT64_C(48294834), -INT64_C(38787891) }, + { -INT64_C(12827943), INT64_C(18336010) } }, + { -INT64_C(35893385), INT64_C(29217445), -INT64_C(9625128)}, + { { INT64_C(19812609), -INT64_C(35893385) }, + { -INT64_C(48294834), INT64_C(29217445) }, + { -INT64_C(12827943), -INT64_C(9625128) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2x3_t r, src, expected; + src.val[0] = simde_vld1q_s64(test_vec[i].src[0]); + src.val[1] = simde_vld1q_s64(test_vec[i].src[1]); + src.val[2] = simde_vld1q_s64(test_vec[i].src[2]); + + SIMDE_CONSTIFY_2_(simde_vld3q_lane_s64, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_s64(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_s64(test_vec[i].r[1]); + expected.val[2] = simde_vld1q_s64(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_i64x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i64x2(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i64x2(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3q_lane_u8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint8_t src[3][16]; + uint8_t buf[3]; + uint8_t r[3][16]; + } test_vec[] = { + { { { UINT8_C(97), UINT8_C(34), UINT8_C(43), UINT8_C(79), UINT8_C(40), UINT8_C(28), UINT8_C(65), UINT8_C(74), + UINT8_C(9), UINT8_C(3), UINT8_C(91), UINT8_C(11), UINT8_C(64), UINT8_C(46), UINT8_C(68), UINT8_C(53) }, + { UINT8_C(42), UINT8_C(29), UINT8_C(58), UINT8_C(95), UINT8_C(50), UINT8_C(75), UINT8_C(26), UINT8_C(66), + UINT8_C(71), UINT8_C(45), UINT8_C(14), UINT8_C(13), UINT8_C(88), UINT8_C(66), UINT8_C(0), UINT8_C(95) }, + { UINT8_C(86), UINT8_C(19), UINT8_C(63), UINT8_C(41), UINT8_C(94), UINT8_C(31), UINT8_C(1), UINT8_C(67), + UINT8_C(85), UINT8_C(10), UINT8_C(21), UINT8_C(61), UINT8_C(99), UINT8_C(54), UINT8_C(88), UINT8_C(96) } }, + { UINT8_C(74), UINT8_C(52), UINT8_C(78)}, + { { UINT8_C(74), UINT8_C(34), UINT8_C(43), UINT8_C(79), UINT8_C(40), UINT8_C(28), UINT8_C(65), UINT8_C(74), + UINT8_C(9), UINT8_C(3), UINT8_C(91), UINT8_C(11), UINT8_C(64), UINT8_C(46), UINT8_C(68), UINT8_C(53) }, + { UINT8_C(52), UINT8_C(29), UINT8_C(58), UINT8_C(95), UINT8_C(50), UINT8_C(75), UINT8_C(26), UINT8_C(66), + UINT8_C(71), UINT8_C(45), UINT8_C(14), UINT8_C(13), UINT8_C(88), UINT8_C(66), UINT8_C(0), UINT8_C(95) }, + { UINT8_C(78), UINT8_C(19), UINT8_C(63), UINT8_C(41), UINT8_C(94), UINT8_C(31), UINT8_C(1), UINT8_C(67), + UINT8_C(85), UINT8_C(10), UINT8_C(21), UINT8_C(61), UINT8_C(99), UINT8_C(54), UINT8_C(88), UINT8_C(96) } } }, + { { { UINT8_C(9), UINT8_C(1), UINT8_C(56), UINT8_C(58), UINT8_C(80), UINT8_C(56), UINT8_C(18), UINT8_C(35), + UINT8_C(9), UINT8_C(71), UINT8_C(43), UINT8_C(36), UINT8_C(32), UINT8_C(11), UINT8_C(67), UINT8_C(53) }, + { UINT8_C(6), UINT8_C(75), UINT8_C(57), UINT8_C(21), UINT8_C(70), UINT8_C(8), UINT8_C(57), UINT8_C(52), + UINT8_C(52), UINT8_C(0), UINT8_C(40), UINT8_C(87), UINT8_C(92), UINT8_C(18), UINT8_C(24), UINT8_C(32) }, + { UINT8_C(33), UINT8_C(9), UINT8_C(18), UINT8_C(75), UINT8_C(35), UINT8_C(76), UINT8_C(99), UINT8_C(66), + UINT8_C(40), UINT8_C(47), UINT8_C(2), UINT8_C(94), UINT8_C(66), UINT8_C(80), UINT8_C(73), UINT8_C(10) } }, + { UINT8_C(68), UINT8_C(58), UINT8_C(68)}, + { { UINT8_C(9), UINT8_C(68), UINT8_C(56), UINT8_C(58), UINT8_C(80), UINT8_C(56), UINT8_C(18), UINT8_C(35), + UINT8_C(9), UINT8_C(71), UINT8_C(43), UINT8_C(36), UINT8_C(32), UINT8_C(11), UINT8_C(67), UINT8_C(53) }, + { UINT8_C(6), UINT8_C(58), UINT8_C(57), UINT8_C(21), UINT8_C(70), UINT8_C(8), UINT8_C(57), UINT8_C(52), + UINT8_C(52), UINT8_C(0), UINT8_C(40), UINT8_C(87), UINT8_C(92), UINT8_C(18), UINT8_C(24), UINT8_C(32) }, + { UINT8_C(33), UINT8_C(68), UINT8_C(18), UINT8_C(75), UINT8_C(35), UINT8_C(76), UINT8_C(99), UINT8_C(66), + UINT8_C(40), UINT8_C(47), UINT8_C(2), UINT8_C(94), UINT8_C(66), UINT8_C(80), UINT8_C(73), UINT8_C(10) } } }, + { { { UINT8_C(50), UINT8_C(10), UINT8_C(1), UINT8_C(90), UINT8_C(50), UINT8_C(47), UINT8_C(86), UINT8_C(81), + UINT8_C(61), UINT8_C(7), UINT8_C(95), UINT8_C(60), UINT8_C(48), UINT8_C(8), UINT8_C(31), UINT8_C(91) }, + { UINT8_C(73), UINT8_C(83), UINT8_C(76), UINT8_C(91), UINT8_C(19), UINT8_C(85), UINT8_C(38), UINT8_C(82), + UINT8_C(27), UINT8_C(82), UINT8_C(46), UINT8_C(65), UINT8_C(68), UINT8_C(68), UINT8_C(33), UINT8_C(1) }, + { UINT8_C(47), UINT8_C(15), UINT8_C(55), UINT8_C(34), UINT8_C(74), UINT8_C(43), UINT8_C(84), UINT8_C(10), + UINT8_C(28), UINT8_C(68), UINT8_C(63), UINT8_C(97), UINT8_C(77), UINT8_C(44), UINT8_C(2), UINT8_C(12) } }, + { UINT8_C(36), UINT8_C(31), UINT8_C(75)}, + { { UINT8_C(50), UINT8_C(10), UINT8_C(36), UINT8_C(90), UINT8_C(50), UINT8_C(47), UINT8_C(86), UINT8_C(81), + UINT8_C(61), UINT8_C(7), UINT8_C(95), UINT8_C(60), UINT8_C(48), UINT8_C(8), UINT8_C(31), UINT8_C(91) }, + { UINT8_C(73), UINT8_C(83), UINT8_C(31), UINT8_C(91), UINT8_C(19), UINT8_C(85), UINT8_C(38), UINT8_C(82), + UINT8_C(27), UINT8_C(82), UINT8_C(46), UINT8_C(65), UINT8_C(68), UINT8_C(68), UINT8_C(33), UINT8_C(1) }, + { UINT8_C(47), UINT8_C(15), UINT8_C(75), UINT8_C(34), UINT8_C(74), UINT8_C(43), UINT8_C(84), UINT8_C(10), + UINT8_C(28), UINT8_C(68), UINT8_C(63), UINT8_C(97), UINT8_C(77), UINT8_C(44), UINT8_C(2), UINT8_C(12) } } }, + { { { UINT8_C(47), UINT8_C(52), UINT8_C(57), UINT8_C(40), UINT8_C(37), UINT8_C(25), UINT8_C(31), UINT8_C(34), + UINT8_C(59), UINT8_C(77), UINT8_C(97), UINT8_C(42), UINT8_C(81), UINT8_C(31), UINT8_C(72), UINT8_C(79) }, + { UINT8_C(34), UINT8_C(27), UINT8_C(19), UINT8_C(64), UINT8_C(15), UINT8_C(17), UINT8_C(15), UINT8_C(47), + UINT8_C(63), UINT8_C(27), UINT8_C(7), UINT8_C(9), UINT8_C(63), UINT8_C(44), UINT8_C(81), UINT8_C(91) }, + { UINT8_C(54), UINT8_C(66), UINT8_C(39), UINT8_C(9), UINT8_C(19), UINT8_C(4), UINT8_C(56), UINT8_C(13), + UINT8_C(45), UINT8_C(91), UINT8_C(74), UINT8_C(85), UINT8_C(70), UINT8_C(79), UINT8_C(65), UINT8_C(66) } }, + { UINT8_C(92), UINT8_C(78), UINT8_C(33)}, + { { UINT8_C(47), UINT8_C(52), UINT8_C(57), UINT8_C(92), UINT8_C(37), UINT8_C(25), UINT8_C(31), UINT8_C(34), + UINT8_C(59), UINT8_C(77), UINT8_C(97), UINT8_C(42), UINT8_C(81), UINT8_C(31), UINT8_C(72), UINT8_C(79) }, + { UINT8_C(34), UINT8_C(27), UINT8_C(19), UINT8_C(78), UINT8_C(15), UINT8_C(17), UINT8_C(15), UINT8_C(47), + UINT8_C(63), UINT8_C(27), UINT8_C(7), UINT8_C(9), UINT8_C(63), UINT8_C(44), UINT8_C(81), UINT8_C(91) }, + { UINT8_C(54), UINT8_C(66), UINT8_C(39), UINT8_C(33), UINT8_C(19), UINT8_C(4), UINT8_C(56), UINT8_C(13), + UINT8_C(45), UINT8_C(91), UINT8_C(74), UINT8_C(85), UINT8_C(70), UINT8_C(79), UINT8_C(65), UINT8_C(66) } } }, + { { { UINT8_C(41), UINT8_C(0), UINT8_C(13), UINT8_C(21), UINT8_C(14), UINT8_C(99), UINT8_C(7), UINT8_C(41), + UINT8_C(65), UINT8_C(42), UINT8_C(70), UINT8_C(7), UINT8_C(31), UINT8_C(6), UINT8_C(57), UINT8_C(25) }, + { UINT8_C(89), UINT8_C(11), UINT8_C(40), UINT8_C(22), UINT8_C(75), UINT8_C(80), UINT8_C(72), UINT8_C(0), + UINT8_C(12), UINT8_C(0), UINT8_C(29), UINT8_C(77), UINT8_C(83), UINT8_C(2), UINT8_C(22), UINT8_C(53) }, + { UINT8_C(82), UINT8_C(8), UINT8_C(33), UINT8_C(81), UINT8_C(96), UINT8_C(37), UINT8_C(91), UINT8_C(76), + UINT8_C(39), UINT8_C(58), UINT8_C(73), UINT8_C(97), UINT8_C(11), UINT8_C(15), UINT8_C(21), UINT8_C(99) } }, + { UINT8_C(96), UINT8_C(62), UINT8_C(14)}, + { { UINT8_C(41), UINT8_C(0), UINT8_C(13), UINT8_C(21), UINT8_C(96), UINT8_C(99), UINT8_C(7), UINT8_C(41), + UINT8_C(65), UINT8_C(42), UINT8_C(70), UINT8_C(7), UINT8_C(31), UINT8_C(6), UINT8_C(57), UINT8_C(25) }, + { UINT8_C(89), UINT8_C(11), UINT8_C(40), UINT8_C(22), UINT8_C(62), UINT8_C(80), UINT8_C(72), UINT8_C(0), + UINT8_C(12), UINT8_C(0), UINT8_C(29), UINT8_C(77), UINT8_C(83), UINT8_C(2), UINT8_C(22), UINT8_C(53) }, + { UINT8_C(82), UINT8_C(8), UINT8_C(33), UINT8_C(81), UINT8_C(14), UINT8_C(37), UINT8_C(91), UINT8_C(76), + UINT8_C(39), UINT8_C(58), UINT8_C(73), UINT8_C(97), UINT8_C(11), UINT8_C(15), UINT8_C(21), UINT8_C(99) } } }, + { { { UINT8_C(20), UINT8_C(41), UINT8_C(29), UINT8_C(44), UINT8_C(67), UINT8_C(37), UINT8_C(87), UINT8_C(42), + UINT8_C(73), UINT8_C(62), UINT8_C(94), UINT8_C(35), UINT8_C(12), UINT8_C(53), UINT8_C(34), UINT8_C(89) }, + { UINT8_C(64), UINT8_C(49), UINT8_C(36), UINT8_C(14), UINT8_C(32), UINT8_C(10), UINT8_C(2), UINT8_C(82), + UINT8_C(64), UINT8_C(73), UINT8_C(51), UINT8_C(96), UINT8_C(23), UINT8_C(75), UINT8_C(27), UINT8_C(25) }, + { UINT8_C(24), UINT8_C(87), UINT8_C(35), UINT8_C(68), UINT8_C(68), UINT8_C(9), UINT8_C(10), UINT8_C(17), + UINT8_C(20), UINT8_C(88), UINT8_C(79), UINT8_C(13), UINT8_C(53), UINT8_C(99), UINT8_C(83), UINT8_C(9) } }, + { UINT8_C(87), UINT8_C(65), UINT8_C(37)}, + { { UINT8_C(20), UINT8_C(41), UINT8_C(29), UINT8_C(44), UINT8_C(67), UINT8_C(87), UINT8_C(87), UINT8_C(42), + UINT8_C(73), UINT8_C(62), UINT8_C(94), UINT8_C(35), UINT8_C(12), UINT8_C(53), UINT8_C(34), UINT8_C(89) }, + { UINT8_C(64), UINT8_C(49), UINT8_C(36), UINT8_C(14), UINT8_C(32), UINT8_C(65), UINT8_C(2), UINT8_C(82), + UINT8_C(64), UINT8_C(73), UINT8_C(51), UINT8_C(96), UINT8_C(23), UINT8_C(75), UINT8_C(27), UINT8_C(25) }, + { UINT8_C(24), UINT8_C(87), UINT8_C(35), UINT8_C(68), UINT8_C(68), UINT8_C(37), UINT8_C(10), UINT8_C(17), + UINT8_C(20), UINT8_C(88), UINT8_C(79), UINT8_C(13), UINT8_C(53), UINT8_C(99), UINT8_C(83), UINT8_C(9) } } }, + { { { UINT8_C(98), UINT8_C(75), UINT8_C(1), UINT8_C(3), UINT8_C(9), UINT8_C(61), UINT8_C(91), UINT8_C(69), + UINT8_C(75), UINT8_C(22), UINT8_C(77), UINT8_C(29), UINT8_C(2), UINT8_C(8), UINT8_C(96), UINT8_C(52) }, + { UINT8_C(35), UINT8_C(65), UINT8_C(32), UINT8_C(16), UINT8_C(62), UINT8_C(3), UINT8_C(91), UINT8_C(38), + UINT8_C(42), UINT8_C(23), UINT8_C(81), UINT8_C(61), UINT8_C(28), UINT8_C(1), UINT8_C(5), UINT8_C(84) }, + { UINT8_C(45), UINT8_C(87), UINT8_C(52), UINT8_C(74), UINT8_C(83), UINT8_C(48), UINT8_C(17), UINT8_C(96), + UINT8_C(48), UINT8_C(82), UINT8_C(56), UINT8_C(23), UINT8_C(23), UINT8_C(52), UINT8_C(69), UINT8_C(57) } }, + { UINT8_C(89), UINT8_C(29), UINT8_C(27)}, + { { UINT8_C(98), UINT8_C(75), UINT8_C(1), UINT8_C(3), UINT8_C(9), UINT8_C(61), UINT8_C(89), UINT8_C(69), + UINT8_C(75), UINT8_C(22), UINT8_C(77), UINT8_C(29), UINT8_C(2), UINT8_C(8), UINT8_C(96), UINT8_C(52) }, + { UINT8_C(35), UINT8_C(65), UINT8_C(32), UINT8_C(16), UINT8_C(62), UINT8_C(3), UINT8_C(29), UINT8_C(38), + UINT8_C(42), UINT8_C(23), UINT8_C(81), UINT8_C(61), UINT8_C(28), UINT8_C(1), UINT8_C(5), UINT8_C(84) }, + { UINT8_C(45), UINT8_C(87), UINT8_C(52), UINT8_C(74), UINT8_C(83), UINT8_C(48), UINT8_C(27), UINT8_C(96), + UINT8_C(48), UINT8_C(82), UINT8_C(56), UINT8_C(23), UINT8_C(23), UINT8_C(52), UINT8_C(69), UINT8_C(57) } } }, + { { { UINT8_C(46), UINT8_C(86), UINT8_C(19), UINT8_C(68), UINT8_C(15), UINT8_C(80), UINT8_C(55), UINT8_C(26), + UINT8_C(13), UINT8_C(91), UINT8_C(5), UINT8_C(36), UINT8_C(91), UINT8_C(27), UINT8_C(80), UINT8_C(74) }, + { UINT8_C(88), UINT8_C(47), UINT8_C(36), UINT8_C(33), UINT8_C(37), UINT8_C(91), UINT8_C(41), UINT8_C(63), + UINT8_C(23), UINT8_C(98), UINT8_C(80), UINT8_C(12), UINT8_C(64), UINT8_C(50), UINT8_C(92), UINT8_C(11) }, + { UINT8_C(99), UINT8_C(57), UINT8_C(22), UINT8_C(30), UINT8_C(57), UINT8_C(84), UINT8_C(43), UINT8_C(39), + UINT8_C(54), UINT8_C(15), UINT8_C(21), UINT8_C(38), UINT8_C(13), UINT8_C(93), UINT8_C(96), UINT8_C(10) } }, + { UINT8_C(75), UINT8_C(42), UINT8_C(8)}, + { { UINT8_C(46), UINT8_C(86), UINT8_C(19), UINT8_C(68), UINT8_C(15), UINT8_C(80), UINT8_C(55), UINT8_C(75), + UINT8_C(13), UINT8_C(91), UINT8_C(5), UINT8_C(36), UINT8_C(91), UINT8_C(27), UINT8_C(80), UINT8_C(74) }, + { UINT8_C(88), UINT8_C(47), UINT8_C(36), UINT8_C(33), UINT8_C(37), UINT8_C(91), UINT8_C(41), UINT8_C(42), + UINT8_C(23), UINT8_C(98), UINT8_C(80), UINT8_C(12), UINT8_C(64), UINT8_C(50), UINT8_C(92), UINT8_C(11) }, + { UINT8_C(99), UINT8_C(57), UINT8_C(22), UINT8_C(30), UINT8_C(57), UINT8_C(84), UINT8_C(43), UINT8_C(8), + UINT8_C(54), UINT8_C(15), UINT8_C(21), UINT8_C(38), UINT8_C(13), UINT8_C(93), UINT8_C(96), UINT8_C(10) } } }, + { { { UINT8_C(24), UINT8_C(21), UINT8_C(44), UINT8_C(24), UINT8_C(42), UINT8_C(32), UINT8_C(52), UINT8_C(88), + UINT8_C(75), UINT8_C(41), UINT8_C(31), UINT8_C(71), UINT8_C(42), UINT8_C(21), UINT8_C(95), UINT8_C(29) }, + { UINT8_C(0), UINT8_C(81), UINT8_C(30), UINT8_C(11), UINT8_C(14), UINT8_C(31), UINT8_C(47), UINT8_C(21), + UINT8_C(76), UINT8_C(47), UINT8_C(72), UINT8_C(10), UINT8_C(59), UINT8_C(86), UINT8_C(58), UINT8_C(86) }, + { UINT8_C(90), UINT8_C(32), UINT8_C(67), UINT8_C(12), UINT8_C(21), UINT8_C(76), UINT8_C(45), UINT8_C(14), + UINT8_C(82), UINT8_C(96), UINT8_C(68), UINT8_C(15), UINT8_C(37), UINT8_C(65), UINT8_C(18), UINT8_C(49) } }, + { UINT8_C(84), UINT8_C(83), UINT8_C(21)}, + { { UINT8_C(24), UINT8_C(21), UINT8_C(44), UINT8_C(24), UINT8_C(42), UINT8_C(32), UINT8_C(52), UINT8_C(88), + UINT8_C(84), UINT8_C(41), UINT8_C(31), UINT8_C(71), UINT8_C(42), UINT8_C(21), UINT8_C(95), UINT8_C(29) }, + { UINT8_C(0), UINT8_C(81), UINT8_C(30), UINT8_C(11), UINT8_C(14), UINT8_C(31), UINT8_C(47), UINT8_C(21), + UINT8_C(83), UINT8_C(47), UINT8_C(72), UINT8_C(10), UINT8_C(59), UINT8_C(86), UINT8_C(58), UINT8_C(86) }, + { UINT8_C(90), UINT8_C(32), UINT8_C(67), UINT8_C(12), UINT8_C(21), UINT8_C(76), UINT8_C(45), UINT8_C(14), + UINT8_C(21), UINT8_C(96), UINT8_C(68), UINT8_C(15), UINT8_C(37), UINT8_C(65), UINT8_C(18), UINT8_C(49) } } }, + { { { UINT8_C(22), UINT8_C(62), UINT8_C(79), UINT8_C(5), UINT8_C(54), UINT8_C(45), UINT8_C(83), UINT8_C(98), + UINT8_C(60), UINT8_C(61), UINT8_C(81), UINT8_C(62), UINT8_C(94), UINT8_C(14), UINT8_C(98), UINT8_C(22) }, + { UINT8_C(78), UINT8_C(87), UINT8_C(48), UINT8_C(29), UINT8_C(52), UINT8_C(86), UINT8_C(71), UINT8_C(13), + UINT8_C(20), UINT8_C(11), UINT8_C(89), UINT8_C(3), UINT8_C(5), UINT8_C(93), UINT8_C(48), UINT8_C(27) }, + { UINT8_C(24), UINT8_C(21), UINT8_C(99), UINT8_C(53), UINT8_C(65), UINT8_C(75), UINT8_C(5), UINT8_C(72), + UINT8_C(8), UINT8_C(35), UINT8_C(7), UINT8_C(49), UINT8_C(6), UINT8_C(75), UINT8_C(51), UINT8_C(60) } }, + { UINT8_C(20), UINT8_C(32), UINT8_C(53)}, + { { UINT8_C(22), UINT8_C(62), UINT8_C(79), UINT8_C(5), UINT8_C(54), UINT8_C(45), UINT8_C(83), UINT8_C(98), + UINT8_C(60), UINT8_C(20), UINT8_C(81), UINT8_C(62), UINT8_C(94), UINT8_C(14), UINT8_C(98), UINT8_C(22) }, + { UINT8_C(78), UINT8_C(87), UINT8_C(48), UINT8_C(29), UINT8_C(52), UINT8_C(86), UINT8_C(71), UINT8_C(13), + UINT8_C(20), UINT8_C(32), UINT8_C(89), UINT8_C(3), UINT8_C(5), UINT8_C(93), UINT8_C(48), UINT8_C(27) }, + { UINT8_C(24), UINT8_C(21), UINT8_C(99), UINT8_C(53), UINT8_C(65), UINT8_C(75), UINT8_C(5), UINT8_C(72), + UINT8_C(8), UINT8_C(53), UINT8_C(7), UINT8_C(49), UINT8_C(6), UINT8_C(75), UINT8_C(51), UINT8_C(60) } } }, + { { { UINT8_C(17), UINT8_C(92), UINT8_C(0), UINT8_C(91), UINT8_C(29), UINT8_C(86), UINT8_C(77), UINT8_C(36), + UINT8_C(8), UINT8_C(86), UINT8_C(69), UINT8_C(6), UINT8_C(35), UINT8_C(48), UINT8_C(92), UINT8_C(63) }, + { UINT8_C(69), UINT8_C(16), UINT8_C(3), UINT8_C(40), UINT8_C(43), UINT8_C(9), UINT8_C(40), UINT8_C(29), + UINT8_C(43), UINT8_C(7), UINT8_C(58), UINT8_C(77), UINT8_C(6), UINT8_C(96), UINT8_C(81), UINT8_C(97) }, + { UINT8_C(45), UINT8_C(34), UINT8_C(19), UINT8_C(92), UINT8_C(4), UINT8_C(72), UINT8_C(78), UINT8_C(30), + UINT8_C(15), UINT8_C(12), UINT8_C(37), UINT8_C(53), UINT8_C(85), UINT8_C(88), UINT8_C(0), UINT8_C(40) } }, + { UINT8_C(63), UINT8_C(11), UINT8_C(19)}, + { { UINT8_C(17), UINT8_C(92), UINT8_C(0), UINT8_C(91), UINT8_C(29), UINT8_C(86), UINT8_C(77), UINT8_C(36), + UINT8_C(8), UINT8_C(86), UINT8_C(63), UINT8_C(6), UINT8_C(35), UINT8_C(48), UINT8_C(92), UINT8_C(63) }, + { UINT8_C(69), UINT8_C(16), UINT8_C(3), UINT8_C(40), UINT8_C(43), UINT8_C(9), UINT8_C(40), UINT8_C(29), + UINT8_C(43), UINT8_C(7), UINT8_C(11), UINT8_C(77), UINT8_C(6), UINT8_C(96), UINT8_C(81), UINT8_C(97) }, + { UINT8_C(45), UINT8_C(34), UINT8_C(19), UINT8_C(92), UINT8_C(4), UINT8_C(72), UINT8_C(78), UINT8_C(30), + UINT8_C(15), UINT8_C(12), UINT8_C(19), UINT8_C(53), UINT8_C(85), UINT8_C(88), UINT8_C(0), UINT8_C(40) } } }, + { { { UINT8_C(97), UINT8_C(65), UINT8_C(44), UINT8_C(86), UINT8_C(8), UINT8_C(17), UINT8_C(68), UINT8_C(90), + UINT8_C(28), UINT8_C(50), UINT8_C(29), UINT8_C(7), UINT8_C(32), UINT8_C(0), UINT8_C(49), UINT8_C(9) }, + { UINT8_C(41), UINT8_C(53), UINT8_C(62), UINT8_C(71), UINT8_C(16), UINT8_C(15), UINT8_C(13), UINT8_C(88), + UINT8_C(68), UINT8_C(9), UINT8_C(29), UINT8_C(6), UINT8_C(22), UINT8_C(32), UINT8_C(28), UINT8_C(73) }, + { UINT8_C(90), UINT8_C(12), UINT8_C(95), UINT8_C(43), UINT8_C(28), UINT8_C(11), UINT8_C(77), UINT8_C(24), + UINT8_C(28), UINT8_C(66), UINT8_C(7), UINT8_C(48), UINT8_C(73), UINT8_C(66), UINT8_C(41), UINT8_C(26) } }, + { UINT8_C(54), UINT8_C(15), UINT8_C(78)}, + { { UINT8_C(97), UINT8_C(65), UINT8_C(44), UINT8_C(86), UINT8_C(8), UINT8_C(17), UINT8_C(68), UINT8_C(90), + UINT8_C(28), UINT8_C(50), UINT8_C(29), UINT8_C(54), UINT8_C(32), UINT8_C(0), UINT8_C(49), UINT8_C(9) }, + { UINT8_C(41), UINT8_C(53), UINT8_C(62), UINT8_C(71), UINT8_C(16), UINT8_C(15), UINT8_C(13), UINT8_C(88), + UINT8_C(68), UINT8_C(9), UINT8_C(29), UINT8_C(15), UINT8_C(22), UINT8_C(32), UINT8_C(28), UINT8_C(73) }, + { UINT8_C(90), UINT8_C(12), UINT8_C(95), UINT8_C(43), UINT8_C(28), UINT8_C(11), UINT8_C(77), UINT8_C(24), + UINT8_C(28), UINT8_C(66), UINT8_C(7), UINT8_C(78), UINT8_C(73), UINT8_C(66), UINT8_C(41), UINT8_C(26) } } }, + { { { UINT8_C(68), UINT8_C(0), UINT8_C(37), UINT8_C(9), UINT8_C(36), UINT8_C(20), UINT8_C(51), UINT8_C(43), + UINT8_C(38), UINT8_C(58), UINT8_C(45), UINT8_C(25), UINT8_C(28), UINT8_C(58), UINT8_C(54), UINT8_C(47) }, + { UINT8_C(95), UINT8_C(96), UINT8_C(83), UINT8_C(40), UINT8_C(74), UINT8_C(12), UINT8_C(25), UINT8_C(32), + UINT8_C(23), UINT8_C(42), UINT8_C(57), UINT8_C(39), UINT8_C(36), UINT8_C(42), UINT8_C(21), UINT8_C(54) }, + { UINT8_C(49), UINT8_C(37), UINT8_C(30), UINT8_C(78), UINT8_C(55), UINT8_C(40), UINT8_C(46), UINT8_C(82), + UINT8_C(55), UINT8_C(76), UINT8_C(19), UINT8_C(6), UINT8_C(60), UINT8_C(52), UINT8_C(70), UINT8_C(78) } }, + { UINT8_C(39), UINT8_C(16), UINT8_C(18)}, + { { UINT8_C(68), UINT8_C(0), UINT8_C(37), UINT8_C(9), UINT8_C(36), UINT8_C(20), UINT8_C(51), UINT8_C(43), + UINT8_C(38), UINT8_C(58), UINT8_C(45), UINT8_C(25), UINT8_C(39), UINT8_C(58), UINT8_C(54), UINT8_C(47) }, + { UINT8_C(95), UINT8_C(96), UINT8_C(83), UINT8_C(40), UINT8_C(74), UINT8_C(12), UINT8_C(25), UINT8_C(32), + UINT8_C(23), UINT8_C(42), UINT8_C(57), UINT8_C(39), UINT8_C(16), UINT8_C(42), UINT8_C(21), UINT8_C(54) }, + { UINT8_C(49), UINT8_C(37), UINT8_C(30), UINT8_C(78), UINT8_C(55), UINT8_C(40), UINT8_C(46), UINT8_C(82), + UINT8_C(55), UINT8_C(76), UINT8_C(19), UINT8_C(6), UINT8_C(18), UINT8_C(52), UINT8_C(70), UINT8_C(78) } } }, + { { { UINT8_C(20), UINT8_C(75), UINT8_C(35), UINT8_C(24), UINT8_C(60), UINT8_C(21), UINT8_C(67), UINT8_C(1), + UINT8_C(23), UINT8_C(57), UINT8_C(40), UINT8_C(31), UINT8_C(71), UINT8_C(21), UINT8_C(41), UINT8_C(71) }, + { UINT8_C(3), UINT8_C(21), UINT8_C(89), UINT8_C(29), UINT8_C(51), UINT8_C(99), UINT8_C(90), UINT8_C(35), + UINT8_C(30), UINT8_C(34), UINT8_C(67), UINT8_C(84), UINT8_C(79), UINT8_C(58), UINT8_C(39), UINT8_C(41) }, + { UINT8_C(0), UINT8_C(68), UINT8_C(73), UINT8_C(59), UINT8_C(44), UINT8_C(83), UINT8_C(41), UINT8_C(72), + UINT8_C(54), UINT8_C(29), UINT8_C(52), UINT8_C(51), UINT8_C(25), UINT8_C(25), UINT8_C(13), UINT8_C(15) } }, + { UINT8_C(13), UINT8_C(84), UINT8_C(69)}, + { { UINT8_C(20), UINT8_C(75), UINT8_C(35), UINT8_C(24), UINT8_C(60), UINT8_C(21), UINT8_C(67), UINT8_C(1), + UINT8_C(23), UINT8_C(57), UINT8_C(40), UINT8_C(31), UINT8_C(71), UINT8_C(13), UINT8_C(41), UINT8_C(71) }, + { UINT8_C(3), UINT8_C(21), UINT8_C(89), UINT8_C(29), UINT8_C(51), UINT8_C(99), UINT8_C(90), UINT8_C(35), + UINT8_C(30), UINT8_C(34), UINT8_C(67), UINT8_C(84), UINT8_C(79), UINT8_C(84), UINT8_C(39), UINT8_C(41) }, + { UINT8_C(0), UINT8_C(68), UINT8_C(73), UINT8_C(59), UINT8_C(44), UINT8_C(83), UINT8_C(41), UINT8_C(72), + UINT8_C(54), UINT8_C(29), UINT8_C(52), UINT8_C(51), UINT8_C(25), UINT8_C(69), UINT8_C(13), UINT8_C(15) } } }, + { { { UINT8_C(78), UINT8_C(80), UINT8_C(8), UINT8_C(85), UINT8_C(17), UINT8_C(53), UINT8_C(88), UINT8_C(87), + UINT8_C(32), UINT8_C(29), UINT8_C(69), UINT8_C(78), UINT8_C(28), UINT8_C(38), UINT8_C(52), UINT8_C(24) }, + { UINT8_C(64), UINT8_C(96), UINT8_C(34), UINT8_C(25), UINT8_C(46), UINT8_C(80), UINT8_C(65), UINT8_C(84), + UINT8_C(71), UINT8_C(36), UINT8_C(9), UINT8_C(67), UINT8_C(75), UINT8_C(57), UINT8_C(24), UINT8_C(69) }, + { UINT8_C(77), UINT8_C(59), UINT8_C(66), UINT8_C(82), UINT8_C(84), UINT8_C(12), UINT8_C(12), UINT8_C(87), + UINT8_C(76), UINT8_C(40), UINT8_C(86), UINT8_C(5), UINT8_C(88), UINT8_C(47), UINT8_C(15), UINT8_C(68) } }, + { UINT8_C(57), UINT8_C(4), UINT8_C(6)}, + { { UINT8_C(78), UINT8_C(80), UINT8_C(8), UINT8_C(85), UINT8_C(17), UINT8_C(53), UINT8_C(88), UINT8_C(87), + UINT8_C(32), UINT8_C(29), UINT8_C(69), UINT8_C(78), UINT8_C(28), UINT8_C(38), UINT8_C(57), UINT8_C(24) }, + { UINT8_C(64), UINT8_C(96), UINT8_C(34), UINT8_C(25), UINT8_C(46), UINT8_C(80), UINT8_C(65), UINT8_C(84), + UINT8_C(71), UINT8_C(36), UINT8_C(9), UINT8_C(67), UINT8_C(75), UINT8_C(57), UINT8_C(4), UINT8_C(69) }, + { UINT8_C(77), UINT8_C(59), UINT8_C(66), UINT8_C(82), UINT8_C(84), UINT8_C(12), UINT8_C(12), UINT8_C(87), + UINT8_C(76), UINT8_C(40), UINT8_C(86), UINT8_C(5), UINT8_C(88), UINT8_C(47), UINT8_C(6), UINT8_C(68) } } }, + { { { UINT8_C(10), UINT8_C(53), UINT8_C(42), UINT8_C(50), UINT8_C(80), UINT8_C(86), UINT8_C(65), UINT8_C(9), + UINT8_C(64), UINT8_C(65), UINT8_C(83), UINT8_C(30), UINT8_C(10), UINT8_C(44), UINT8_C(49), UINT8_C(37) }, + { UINT8_C(31), UINT8_C(51), UINT8_C(22), UINT8_C(60), UINT8_C(49), UINT8_C(93), UINT8_C(32), UINT8_C(99), + UINT8_C(7), UINT8_C(29), UINT8_C(87), UINT8_C(97), UINT8_C(99), UINT8_C(4), UINT8_C(60), UINT8_C(5) }, + { UINT8_C(4), UINT8_C(70), UINT8_C(12), UINT8_C(81), UINT8_C(94), UINT8_C(77), UINT8_C(86), UINT8_C(97), + UINT8_C(2), UINT8_C(78), UINT8_C(86), UINT8_C(4), UINT8_C(45), UINT8_C(33), UINT8_C(66), UINT8_C(9) } }, + { UINT8_C(6), UINT8_C(72), UINT8_C(66)}, + { { UINT8_C(10), UINT8_C(53), UINT8_C(42), UINT8_C(50), UINT8_C(80), UINT8_C(86), UINT8_C(65), UINT8_C(9), + UINT8_C(64), UINT8_C(65), UINT8_C(83), UINT8_C(30), UINT8_C(10), UINT8_C(44), UINT8_C(49), UINT8_C(6) }, + { UINT8_C(31), UINT8_C(51), UINT8_C(22), UINT8_C(60), UINT8_C(49), UINT8_C(93), UINT8_C(32), UINT8_C(99), + UINT8_C(7), UINT8_C(29), UINT8_C(87), UINT8_C(97), UINT8_C(99), UINT8_C(4), UINT8_C(60), UINT8_C(72) }, + { UINT8_C(4), UINT8_C(70), UINT8_C(12), UINT8_C(81), UINT8_C(94), UINT8_C(77), UINT8_C(86), UINT8_C(97), + UINT8_C(2), UINT8_C(78), UINT8_C(86), UINT8_C(4), UINT8_C(45), UINT8_C(33), UINT8_C(66), UINT8_C(66) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x16x3_t r, src, expected; + src.val[0] = simde_vld1q_u8(test_vec[i].src[0]); + src.val[1] = simde_vld1q_u8(test_vec[i].src[1]); + src.val[2] = simde_vld1q_u8(test_vec[i].src[2]); + + SIMDE_CONSTIFY_16_(simde_vld3q_lane_u8, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_u8(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_u8(test_vec[i].r[1]); + expected.val[2] = simde_vld1q_u8(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_u8x16(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u8x16(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u8x16(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3q_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t src[3][8]; + uint16_t buf[3]; + uint16_t r[3][8]; + } test_vec[] = { + { { { UINT16_C(2878), UINT16_C(9122), UINT16_C(7556), UINT16_C(7857), + UINT16_C(958), UINT16_C(7812), UINT16_C(2259), UINT16_C(582) }, + { UINT16_C(5578), UINT16_C(9333), UINT16_C(1655), UINT16_C(9009), + UINT16_C(596), UINT16_C(8223), UINT16_C(5654), UINT16_C(3297) }, + { UINT16_C(5817), UINT16_C(7331), UINT16_C(8798), UINT16_C(172), + UINT16_C(1106), UINT16_C(371), UINT16_C(9285), UINT16_C(5940) } }, + { UINT16_C(9267), UINT16_C(2616), UINT16_C(7110)}, + { { UINT16_C(9267), UINT16_C(9122), UINT16_C(7556), UINT16_C(7857), + UINT16_C(958), UINT16_C(7812), UINT16_C(2259), UINT16_C(582) }, + { UINT16_C(2616), UINT16_C(9333), UINT16_C(1655), UINT16_C(9009), + UINT16_C(596), UINT16_C(8223), UINT16_C(5654), UINT16_C(3297) }, + { UINT16_C(7110), UINT16_C(7331), UINT16_C(8798), UINT16_C(172), + UINT16_C(1106), UINT16_C(371), UINT16_C(9285), UINT16_C(5940) } } }, + { { { UINT16_C(2435), UINT16_C(4626), UINT16_C(4142), UINT16_C(3387), + UINT16_C(6479), UINT16_C(7643), UINT16_C(5193), UINT16_C(3942) }, + { UINT16_C(3016), UINT16_C(1407), UINT16_C(8271), UINT16_C(4950), + UINT16_C(1775), UINT16_C(4681), UINT16_C(737), UINT16_C(7429) }, + { UINT16_C(7279), UINT16_C(9658), UINT16_C(8142), UINT16_C(2112), + UINT16_C(4555), UINT16_C(469), UINT16_C(1007), UINT16_C(2148) } }, + { UINT16_C(4650), UINT16_C(9800), UINT16_C(2863)}, + { { UINT16_C(2435), UINT16_C(4650), UINT16_C(4142), UINT16_C(3387), + UINT16_C(6479), UINT16_C(7643), UINT16_C(5193), UINT16_C(3942) }, + { UINT16_C(3016), UINT16_C(9800), UINT16_C(8271), UINT16_C(4950), + UINT16_C(1775), UINT16_C(4681), UINT16_C(737), UINT16_C(7429) }, + { UINT16_C(7279), UINT16_C(2863), UINT16_C(8142), UINT16_C(2112), + UINT16_C(4555), UINT16_C(469), UINT16_C(1007), UINT16_C(2148) } } }, + { { { UINT16_C(3842), UINT16_C(9560), UINT16_C(4451), UINT16_C(3039), + UINT16_C(1396), UINT16_C(7094), UINT16_C(9151), UINT16_C(328) }, + { UINT16_C(3222), UINT16_C(3639), UINT16_C(4981), UINT16_C(4084), + UINT16_C(4213), UINT16_C(6807), UINT16_C(5985), UINT16_C(3640) }, + { UINT16_C(5867), UINT16_C(7377), UINT16_C(7519), UINT16_C(4002), + UINT16_C(4302), UINT16_C(897), UINT16_C(4387), UINT16_C(6837) } }, + { UINT16_C(5729), UINT16_C(6854), UINT16_C(829)}, + { { UINT16_C(3842), UINT16_C(9560), UINT16_C(5729), UINT16_C(3039), + UINT16_C(1396), UINT16_C(7094), UINT16_C(9151), UINT16_C(328) }, + { UINT16_C(3222), UINT16_C(3639), UINT16_C(6854), UINT16_C(4084), + UINT16_C(4213), UINT16_C(6807), UINT16_C(5985), UINT16_C(3640) }, + { UINT16_C(5867), UINT16_C(7377), UINT16_C(829), UINT16_C(4002), + UINT16_C(4302), UINT16_C(897), UINT16_C(4387), UINT16_C(6837) } } }, + { { { UINT16_C(7563), UINT16_C(2164), UINT16_C(8586), UINT16_C(1245), + UINT16_C(9300), UINT16_C(1489), UINT16_C(7158), UINT16_C(4863) }, + { UINT16_C(1424), UINT16_C(4247), UINT16_C(5640), UINT16_C(6608), + UINT16_C(5989), UINT16_C(3578), UINT16_C(742), UINT16_C(1885) }, + { UINT16_C(5512), UINT16_C(3861), UINT16_C(6631), UINT16_C(2430), + UINT16_C(2806), UINT16_C(1789), UINT16_C(8536), UINT16_C(1442) } }, + { UINT16_C(321), UINT16_C(9802), UINT16_C(8592)}, + { { UINT16_C(7563), UINT16_C(2164), UINT16_C(8586), UINT16_C(321), + UINT16_C(9300), UINT16_C(1489), UINT16_C(7158), UINT16_C(4863) }, + { UINT16_C(1424), UINT16_C(4247), UINT16_C(5640), UINT16_C(9802), + UINT16_C(5989), UINT16_C(3578), UINT16_C(742), UINT16_C(1885) }, + { UINT16_C(5512), UINT16_C(3861), UINT16_C(6631), UINT16_C(8592), + UINT16_C(2806), UINT16_C(1789), UINT16_C(8536), UINT16_C(1442) } } }, + { { { UINT16_C(846), UINT16_C(2844), UINT16_C(4543), UINT16_C(5042), + UINT16_C(9059), UINT16_C(789), UINT16_C(9345), UINT16_C(8528) }, + { UINT16_C(5245), UINT16_C(4972), UINT16_C(9392), UINT16_C(6785), + UINT16_C(6982), UINT16_C(1587), UINT16_C(9547), UINT16_C(806) }, + { UINT16_C(2535), UINT16_C(912), UINT16_C(7134), UINT16_C(1939), + UINT16_C(226), UINT16_C(4171), UINT16_C(756), UINT16_C(7958) } }, + { UINT16_C(9975), UINT16_C(6402), UINT16_C(5394)}, + { { UINT16_C(846), UINT16_C(2844), UINT16_C(4543), UINT16_C(5042), + UINT16_C(9975), UINT16_C(789), UINT16_C(9345), UINT16_C(8528) }, + { UINT16_C(5245), UINT16_C(4972), UINT16_C(9392), UINT16_C(6785), + UINT16_C(6402), UINT16_C(1587), UINT16_C(9547), UINT16_C(806) }, + { UINT16_C(2535), UINT16_C(912), UINT16_C(7134), UINT16_C(1939), + UINT16_C(5394), UINT16_C(4171), UINT16_C(756), UINT16_C(7958) } } }, + { { { UINT16_C(5491), UINT16_C(9037), UINT16_C(2121), UINT16_C(3027), + UINT16_C(9896), UINT16_C(3828), UINT16_C(4925), UINT16_C(6295) }, + { UINT16_C(4330), UINT16_C(4964), UINT16_C(3055), UINT16_C(2853), + UINT16_C(5505), UINT16_C(2630), UINT16_C(7217), UINT16_C(9453) }, + { UINT16_C(9293), UINT16_C(8272), UINT16_C(4351), UINT16_C(7481), + UINT16_C(2380), UINT16_C(1671), UINT16_C(9975), UINT16_C(6647) } }, + { UINT16_C(2220), UINT16_C(9744), UINT16_C(8641)}, + { { UINT16_C(5491), UINT16_C(9037), UINT16_C(2121), UINT16_C(3027), + UINT16_C(9896), UINT16_C(2220), UINT16_C(4925), UINT16_C(6295) }, + { UINT16_C(4330), UINT16_C(4964), UINT16_C(3055), UINT16_C(2853), + UINT16_C(5505), UINT16_C(9744), UINT16_C(7217), UINT16_C(9453) }, + { UINT16_C(9293), UINT16_C(8272), UINT16_C(4351), UINT16_C(7481), + UINT16_C(2380), UINT16_C(8641), UINT16_C(9975), UINT16_C(6647) } } }, + { { { UINT16_C(9694), UINT16_C(2965), UINT16_C(2860), UINT16_C(3217), + UINT16_C(905), UINT16_C(1382), UINT16_C(2539), UINT16_C(2136) }, + { UINT16_C(9340), UINT16_C(8057), UINT16_C(2560), UINT16_C(2780), + UINT16_C(9091), UINT16_C(341), UINT16_C(2559), UINT16_C(5566) }, + { UINT16_C(5464), UINT16_C(8934), UINT16_C(8459), UINT16_C(5838), + UINT16_C(5847), UINT16_C(9085), UINT16_C(7113), UINT16_C(1453) } }, + { UINT16_C(9231), UINT16_C(1160), UINT16_C(1750)}, + { { UINT16_C(9694), UINT16_C(2965), UINT16_C(2860), UINT16_C(3217), + UINT16_C(905), UINT16_C(1382), UINT16_C(9231), UINT16_C(2136) }, + { UINT16_C(9340), UINT16_C(8057), UINT16_C(2560), UINT16_C(2780), + UINT16_C(9091), UINT16_C(341), UINT16_C(1160), UINT16_C(5566) }, + { UINT16_C(5464), UINT16_C(8934), UINT16_C(8459), UINT16_C(5838), + UINT16_C(5847), UINT16_C(9085), UINT16_C(1750), UINT16_C(1453) } } }, + { { { UINT16_C(4299), UINT16_C(2575), UINT16_C(1738), UINT16_C(2713), + UINT16_C(3823), UINT16_C(2897), UINT16_C(4868), UINT16_C(3272) }, + { UINT16_C(1451), UINT16_C(4038), UINT16_C(5588), UINT16_C(8951), + UINT16_C(8365), UINT16_C(7112), UINT16_C(7225), UINT16_C(5057) }, + { UINT16_C(1287), UINT16_C(8547), UINT16_C(3369), UINT16_C(2472), + UINT16_C(2063), UINT16_C(1405), UINT16_C(3647), UINT16_C(1463) } }, + { UINT16_C(6301), UINT16_C(8922), UINT16_C(8856)}, + { { UINT16_C(4299), UINT16_C(2575), UINT16_C(1738), UINT16_C(2713), + UINT16_C(3823), UINT16_C(2897), UINT16_C(4868), UINT16_C(6301) }, + { UINT16_C(1451), UINT16_C(4038), UINT16_C(5588), UINT16_C(8951), + UINT16_C(8365), UINT16_C(7112), UINT16_C(7225), UINT16_C(8922) }, + { UINT16_C(1287), UINT16_C(8547), UINT16_C(3369), UINT16_C(2472), + UINT16_C(2063), UINT16_C(1405), UINT16_C(3647), UINT16_C(8856) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x8x3_t r, src, expected; + src.val[0] = simde_vld1q_u16(test_vec[i].src[0]); + src.val[1] = simde_vld1q_u16(test_vec[i].src[1]); + src.val[2] = simde_vld1q_u16(test_vec[i].src[2]); + + SIMDE_CONSTIFY_8_(simde_vld3q_lane_u16, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_u16(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_u16(test_vec[i].r[1]); + expected.val[2] = simde_vld1q_u16(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_u16x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u16x8(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u16x8(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3q_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t src[3][4]; + uint32_t buf[3]; + uint32_t r[3][4]; + } test_vec[] = { + { { { UINT32_C(959147), UINT32_C(94730), UINT32_C(265123), UINT32_C(263205) }, + { UINT32_C(327934), UINT32_C(818462), UINT32_C(254631), UINT32_C(86194) }, + { UINT32_C(686652), UINT32_C(235562), UINT32_C(658626), UINT32_C(197485) } }, + { UINT32_C(762356), UINT32_C(737061), UINT32_C(61171)}, + { { UINT32_C(762356), UINT32_C(94730), UINT32_C(265123), UINT32_C(263205) }, + { UINT32_C(737061), UINT32_C(818462), UINT32_C(254631), UINT32_C(86194) }, + { UINT32_C(61171), UINT32_C(235562), UINT32_C(658626), UINT32_C(197485) } } }, + { { { UINT32_C(702078), UINT32_C(591841), UINT32_C(292142), UINT32_C(285089) }, + { UINT32_C(587049), UINT32_C(158254), UINT32_C(152477), UINT32_C(801436) }, + { UINT32_C(770450), UINT32_C(89829), UINT32_C(318188), UINT32_C(152245) } }, + { UINT32_C(632310), UINT32_C(338180), UINT32_C(842631)}, + { { UINT32_C(702078), UINT32_C(632310), UINT32_C(292142), UINT32_C(285089) }, + { UINT32_C(587049), UINT32_C(338180), UINT32_C(152477), UINT32_C(801436) }, + { UINT32_C(770450), UINT32_C(842631), UINT32_C(318188), UINT32_C(152245) } } }, + { { { UINT32_C(845723), UINT32_C(174450), UINT32_C(865498), UINT32_C(926110) }, + { UINT32_C(981718), UINT32_C(813344), UINT32_C(735780), UINT32_C(959418) }, + { UINT32_C(814557), UINT32_C(573143), UINT32_C(702063), UINT32_C(80851) } }, + { UINT32_C(353668), UINT32_C(149751), UINT32_C(602863)}, + { { UINT32_C(845723), UINT32_C(174450), UINT32_C(353668), UINT32_C(926110) }, + { UINT32_C(981718), UINT32_C(813344), UINT32_C(149751), UINT32_C(959418) }, + { UINT32_C(814557), UINT32_C(573143), UINT32_C(602863), UINT32_C(80851) } } }, + { { { UINT32_C(306861), UINT32_C(942100), UINT32_C(490421), UINT32_C(582143) }, + { UINT32_C(266515), UINT32_C(182890), UINT32_C(927663), UINT32_C(939410) }, + { UINT32_C(876199), UINT32_C(921600), UINT32_C(507718), UINT32_C(447676) } }, + { UINT32_C(769107), UINT32_C(184326), UINT32_C(759585)}, + { { UINT32_C(306861), UINT32_C(942100), UINT32_C(490421), UINT32_C(769107) }, + { UINT32_C(266515), UINT32_C(182890), UINT32_C(927663), UINT32_C(184326) }, + { UINT32_C(876199), UINT32_C(921600), UINT32_C(507718), UINT32_C(759585) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4x3_t r, src, expected; + src.val[0] = simde_vld1q_u32(test_vec[i].src[0]); + src.val[1] = simde_vld1q_u32(test_vec[i].src[1]); + src.val[2] = simde_vld1q_u32(test_vec[i].src[2]); + + SIMDE_CONSTIFY_4_(simde_vld3q_lane_u32, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_u32(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_u32(test_vec[i].r[1]); + expected.val[2] = simde_vld1q_u32(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_u32x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u32x4(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u32x4(r.val[2], expected.val[2]); + } + return 0; +} + +static int +test_simde_vld3q_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t src[3][2]; + uint64_t buf[3]; + uint64_t r[3][2]; + } test_vec[] = { + { { { UINT64_C(56952904), UINT64_C(35256781) }, + { UINT64_C(89718407), UINT64_C(12358219) }, + { UINT64_C(61792069), UINT64_C(55919668) } }, + { UINT64_C(95716616), UINT64_C(80066973), UINT64_C(46886750)}, + { { UINT64_C(95716616), UINT64_C(35256781) }, + { UINT64_C(80066973), UINT64_C(12358219) }, + { UINT64_C(46886750), UINT64_C(55919668) } } }, + { { { UINT64_C(95308677), UINT64_C(34498022) }, + { UINT64_C(67254093), UINT64_C(94380501) }, + { UINT64_C(77927395), UINT64_C(69899674) } }, + { UINT64_C(50242963), UINT64_C(81401259), UINT64_C(34813265)}, + { { UINT64_C(95308677), UINT64_C(50242963) }, + { UINT64_C(67254093), UINT64_C(81401259) }, + { UINT64_C(77927395), UINT64_C(34813265) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2x3_t r, src, expected; + src.val[0] = simde_vld1q_u64(test_vec[i].src[0]); + src.val[1] = simde_vld1q_u64(test_vec[i].src[1]); + src.val[2] = simde_vld1q_u64(test_vec[i].src[2]); + + SIMDE_CONSTIFY_2_(simde_vld3q_lane_u64, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_u64(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_u64(test_vec[i].r[1]); + expected.val[2] = simde_vld1q_u64(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_u64x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u64x2(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u64x2(r.val[2], expected.val[2]); + } + + return 0; +} + +static int +test_simde_vld3q_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t src[3][8]; + simde_float16_t buf[3]; + simde_float16_t r[3][8]; + } test_vec[] = { + { { { SIMDE_FLOAT16_VALUE(25.81), SIMDE_FLOAT16_VALUE(32.75), SIMDE_FLOAT16_VALUE(23.73), SIMDE_FLOAT16_VALUE(-45.72), + SIMDE_FLOAT16_VALUE(21.87), SIMDE_FLOAT16_VALUE(8.97), SIMDE_FLOAT16_VALUE(-15.67), SIMDE_FLOAT16_VALUE(26.89) }, + { SIMDE_FLOAT16_VALUE(16.63), SIMDE_FLOAT16_VALUE(6.70), SIMDE_FLOAT16_VALUE(26.16), SIMDE_FLOAT16_VALUE(-0.53), + SIMDE_FLOAT16_VALUE(-39.38), SIMDE_FLOAT16_VALUE(-30.03), SIMDE_FLOAT16_VALUE(33.53), SIMDE_FLOAT16_VALUE(-5.08) }, + { SIMDE_FLOAT16_VALUE(24.18), SIMDE_FLOAT16_VALUE(-0.77), SIMDE_FLOAT16_VALUE(-38.76), SIMDE_FLOAT16_VALUE(23.74), + SIMDE_FLOAT16_VALUE(-37.79), SIMDE_FLOAT16_VALUE(-12.87), SIMDE_FLOAT16_VALUE(35.22), SIMDE_FLOAT16_VALUE(18.82) } }, + { SIMDE_FLOAT16_VALUE(18.75), SIMDE_FLOAT16_VALUE(-19.33), SIMDE_FLOAT16_VALUE(-8.32)}, + { { SIMDE_FLOAT16_VALUE(18.75), SIMDE_FLOAT16_VALUE(32.75), SIMDE_FLOAT16_VALUE(23.73), SIMDE_FLOAT16_VALUE(-45.72), + SIMDE_FLOAT16_VALUE(21.87), SIMDE_FLOAT16_VALUE(8.97), SIMDE_FLOAT16_VALUE(-15.67), SIMDE_FLOAT16_VALUE(26.89) }, + { SIMDE_FLOAT16_VALUE(-19.33), SIMDE_FLOAT16_VALUE(6.70), SIMDE_FLOAT16_VALUE(26.16), SIMDE_FLOAT16_VALUE(-0.53), + SIMDE_FLOAT16_VALUE(-39.38), SIMDE_FLOAT16_VALUE(-30.03), SIMDE_FLOAT16_VALUE(33.53), SIMDE_FLOAT16_VALUE(-5.08) }, + { SIMDE_FLOAT16_VALUE(-8.32), SIMDE_FLOAT16_VALUE(-0.77), SIMDE_FLOAT16_VALUE(-38.76), SIMDE_FLOAT16_VALUE(23.74), + SIMDE_FLOAT16_VALUE(-37.79), SIMDE_FLOAT16_VALUE(-12.87), SIMDE_FLOAT16_VALUE(35.22), SIMDE_FLOAT16_VALUE(18.82) } } }, + { { { SIMDE_FLOAT16_VALUE(-21.87), SIMDE_FLOAT16_VALUE(-23.46), SIMDE_FLOAT16_VALUE(43.50), SIMDE_FLOAT16_VALUE(-42.37), + SIMDE_FLOAT16_VALUE(38.96), SIMDE_FLOAT16_VALUE(-39.73), SIMDE_FLOAT16_VALUE(1.99), SIMDE_FLOAT16_VALUE(13.47) }, + { SIMDE_FLOAT16_VALUE(-21.30), SIMDE_FLOAT16_VALUE(31.71), SIMDE_FLOAT16_VALUE(43.14), SIMDE_FLOAT16_VALUE(6.05), + SIMDE_FLOAT16_VALUE(-9.36), SIMDE_FLOAT16_VALUE(31.44), SIMDE_FLOAT16_VALUE(-1.40), SIMDE_FLOAT16_VALUE(-46.83) }, + { SIMDE_FLOAT16_VALUE(22.32), SIMDE_FLOAT16_VALUE(-30.37), SIMDE_FLOAT16_VALUE(-16.31), SIMDE_FLOAT16_VALUE(48.67), + SIMDE_FLOAT16_VALUE(-32.26), SIMDE_FLOAT16_VALUE(-43.03), SIMDE_FLOAT16_VALUE(-31.49), SIMDE_FLOAT16_VALUE(-29.73) } }, + { SIMDE_FLOAT16_VALUE(-19.96), SIMDE_FLOAT16_VALUE(16.11), SIMDE_FLOAT16_VALUE(15.29)}, + { { SIMDE_FLOAT16_VALUE(-21.87), SIMDE_FLOAT16_VALUE(-19.96), SIMDE_FLOAT16_VALUE(43.50), SIMDE_FLOAT16_VALUE(-42.37), + SIMDE_FLOAT16_VALUE(38.96), SIMDE_FLOAT16_VALUE(-39.73), SIMDE_FLOAT16_VALUE(1.99), SIMDE_FLOAT16_VALUE(13.47) }, + { SIMDE_FLOAT16_VALUE(-21.30), SIMDE_FLOAT16_VALUE(16.11), SIMDE_FLOAT16_VALUE(43.14), SIMDE_FLOAT16_VALUE(6.05), + SIMDE_FLOAT16_VALUE(-9.36), SIMDE_FLOAT16_VALUE(31.44), SIMDE_FLOAT16_VALUE(-1.40), SIMDE_FLOAT16_VALUE(-46.83) }, + { SIMDE_FLOAT16_VALUE(22.32), SIMDE_FLOAT16_VALUE(15.29), SIMDE_FLOAT16_VALUE(-16.31), SIMDE_FLOAT16_VALUE(48.67), + SIMDE_FLOAT16_VALUE(-32.26), SIMDE_FLOAT16_VALUE(-43.03), SIMDE_FLOAT16_VALUE(-31.49), SIMDE_FLOAT16_VALUE(-29.73) } } }, + { { { SIMDE_FLOAT16_VALUE(22.99), SIMDE_FLOAT16_VALUE(19.29), SIMDE_FLOAT16_VALUE(-46.27), SIMDE_FLOAT16_VALUE(-49.21), + SIMDE_FLOAT16_VALUE(48.55), SIMDE_FLOAT16_VALUE(49.51), SIMDE_FLOAT16_VALUE(1.68), SIMDE_FLOAT16_VALUE(4.07) }, + { SIMDE_FLOAT16_VALUE(-8.20), SIMDE_FLOAT16_VALUE(5.11), SIMDE_FLOAT16_VALUE(17.40), SIMDE_FLOAT16_VALUE(-3.87), + SIMDE_FLOAT16_VALUE(-20.55), SIMDE_FLOAT16_VALUE(19.44), SIMDE_FLOAT16_VALUE(-49.04), SIMDE_FLOAT16_VALUE(-1.42) }, + { SIMDE_FLOAT16_VALUE(22.33), SIMDE_FLOAT16_VALUE(-45.53), SIMDE_FLOAT16_VALUE(-33.31), SIMDE_FLOAT16_VALUE(-20.16), + SIMDE_FLOAT16_VALUE(-39.51), SIMDE_FLOAT16_VALUE(-20.23), SIMDE_FLOAT16_VALUE(-41.96), SIMDE_FLOAT16_VALUE(-27.75) } }, + { SIMDE_FLOAT16_VALUE(21.67), SIMDE_FLOAT16_VALUE(-1.62), SIMDE_FLOAT16_VALUE(47.29)}, + { { SIMDE_FLOAT16_VALUE(22.99), SIMDE_FLOAT16_VALUE(19.29), SIMDE_FLOAT16_VALUE(21.67), SIMDE_FLOAT16_VALUE(-49.21), + SIMDE_FLOAT16_VALUE(48.55), SIMDE_FLOAT16_VALUE(49.51), SIMDE_FLOAT16_VALUE(1.68), SIMDE_FLOAT16_VALUE(4.07) }, + { SIMDE_FLOAT16_VALUE(-8.20), SIMDE_FLOAT16_VALUE(5.11), SIMDE_FLOAT16_VALUE(-1.62), SIMDE_FLOAT16_VALUE(-3.87), + SIMDE_FLOAT16_VALUE(-20.55), SIMDE_FLOAT16_VALUE(19.44), SIMDE_FLOAT16_VALUE(-49.04), SIMDE_FLOAT16_VALUE(-1.42) }, + { SIMDE_FLOAT16_VALUE(22.33), SIMDE_FLOAT16_VALUE(-45.53), SIMDE_FLOAT16_VALUE(47.29), SIMDE_FLOAT16_VALUE(-20.16), + SIMDE_FLOAT16_VALUE(-39.51), SIMDE_FLOAT16_VALUE(-20.23), SIMDE_FLOAT16_VALUE(-41.96), SIMDE_FLOAT16_VALUE(-27.75) } } }, + { { { SIMDE_FLOAT16_VALUE(-13.98), SIMDE_FLOAT16_VALUE(-15.40), SIMDE_FLOAT16_VALUE(-41.40), SIMDE_FLOAT16_VALUE(-11.10), + SIMDE_FLOAT16_VALUE(-0.98), SIMDE_FLOAT16_VALUE(20.30), SIMDE_FLOAT16_VALUE(-21.19), SIMDE_FLOAT16_VALUE(32.22) }, + { SIMDE_FLOAT16_VALUE(-29.71), SIMDE_FLOAT16_VALUE(-33.61), SIMDE_FLOAT16_VALUE(-34.96), SIMDE_FLOAT16_VALUE(22.05), + SIMDE_FLOAT16_VALUE(-46.85), SIMDE_FLOAT16_VALUE(49.18), SIMDE_FLOAT16_VALUE(29.80), SIMDE_FLOAT16_VALUE(15.12) }, + { SIMDE_FLOAT16_VALUE(-48.25), SIMDE_FLOAT16_VALUE(20.21), SIMDE_FLOAT16_VALUE(30.89), SIMDE_FLOAT16_VALUE(-8.00), + SIMDE_FLOAT16_VALUE(47.59), SIMDE_FLOAT16_VALUE(-33.30), SIMDE_FLOAT16_VALUE(-17.17), SIMDE_FLOAT16_VALUE(15.96) } }, + { SIMDE_FLOAT16_VALUE(-22.71), SIMDE_FLOAT16_VALUE(18.03), SIMDE_FLOAT16_VALUE(-12.62)}, + { { SIMDE_FLOAT16_VALUE(-13.98), SIMDE_FLOAT16_VALUE(-15.40), SIMDE_FLOAT16_VALUE(-41.40), SIMDE_FLOAT16_VALUE(-22.71), + SIMDE_FLOAT16_VALUE(-0.98), SIMDE_FLOAT16_VALUE(20.30), SIMDE_FLOAT16_VALUE(-21.19), SIMDE_FLOAT16_VALUE(32.22) }, + { SIMDE_FLOAT16_VALUE(-29.71), SIMDE_FLOAT16_VALUE(-33.61), SIMDE_FLOAT16_VALUE(-34.96), SIMDE_FLOAT16_VALUE(18.03), + SIMDE_FLOAT16_VALUE(-46.85), SIMDE_FLOAT16_VALUE(49.18), SIMDE_FLOAT16_VALUE(29.80), SIMDE_FLOAT16_VALUE(15.12) }, + { SIMDE_FLOAT16_VALUE(-48.25), SIMDE_FLOAT16_VALUE(20.21), SIMDE_FLOAT16_VALUE(30.89), SIMDE_FLOAT16_VALUE(-12.62), + SIMDE_FLOAT16_VALUE(47.59), SIMDE_FLOAT16_VALUE(-33.30), SIMDE_FLOAT16_VALUE(-17.17), SIMDE_FLOAT16_VALUE(15.96) } } }, + { { { SIMDE_FLOAT16_VALUE(-33.16), SIMDE_FLOAT16_VALUE(41.77), SIMDE_FLOAT16_VALUE(0.37), SIMDE_FLOAT16_VALUE(49.97), + SIMDE_FLOAT16_VALUE(41.81), SIMDE_FLOAT16_VALUE(-38.26), SIMDE_FLOAT16_VALUE(36.03), SIMDE_FLOAT16_VALUE(-6.12) }, + { SIMDE_FLOAT16_VALUE(45.98), SIMDE_FLOAT16_VALUE(33.57), SIMDE_FLOAT16_VALUE(4.26), SIMDE_FLOAT16_VALUE(19.87), + SIMDE_FLOAT16_VALUE(6.49), SIMDE_FLOAT16_VALUE(3.23), SIMDE_FLOAT16_VALUE(17.28), SIMDE_FLOAT16_VALUE(-0.70) }, + { SIMDE_FLOAT16_VALUE(-45.21), SIMDE_FLOAT16_VALUE(10.70), SIMDE_FLOAT16_VALUE(-30.89), SIMDE_FLOAT16_VALUE(17.48), + SIMDE_FLOAT16_VALUE(-18.12), SIMDE_FLOAT16_VALUE(-36.32), SIMDE_FLOAT16_VALUE(12.70), SIMDE_FLOAT16_VALUE(-9.89) } }, + { SIMDE_FLOAT16_VALUE(19.50), SIMDE_FLOAT16_VALUE(43.23), SIMDE_FLOAT16_VALUE(-34.14)}, + { { SIMDE_FLOAT16_VALUE(-33.16), SIMDE_FLOAT16_VALUE(41.77), SIMDE_FLOAT16_VALUE(0.37), SIMDE_FLOAT16_VALUE(49.97), + SIMDE_FLOAT16_VALUE(19.50), SIMDE_FLOAT16_VALUE(-38.26), SIMDE_FLOAT16_VALUE(36.03), SIMDE_FLOAT16_VALUE(-6.12) }, + { SIMDE_FLOAT16_VALUE(45.98), SIMDE_FLOAT16_VALUE(33.57), SIMDE_FLOAT16_VALUE(4.26), SIMDE_FLOAT16_VALUE(19.87), + SIMDE_FLOAT16_VALUE(43.23), SIMDE_FLOAT16_VALUE(3.23), SIMDE_FLOAT16_VALUE(17.28), SIMDE_FLOAT16_VALUE(-0.70) }, + { SIMDE_FLOAT16_VALUE(-45.21), SIMDE_FLOAT16_VALUE(10.70), SIMDE_FLOAT16_VALUE(-30.89), SIMDE_FLOAT16_VALUE(17.48), + SIMDE_FLOAT16_VALUE(-34.14), SIMDE_FLOAT16_VALUE(-36.32), SIMDE_FLOAT16_VALUE(12.70), SIMDE_FLOAT16_VALUE(-9.89) } } }, + { { { SIMDE_FLOAT16_VALUE(34.35), SIMDE_FLOAT16_VALUE(45.11), SIMDE_FLOAT16_VALUE(-17.38), SIMDE_FLOAT16_VALUE(32.13), + SIMDE_FLOAT16_VALUE(-19.58), SIMDE_FLOAT16_VALUE(-5.73), SIMDE_FLOAT16_VALUE(-14.12), SIMDE_FLOAT16_VALUE(-0.42) }, + { SIMDE_FLOAT16_VALUE(-17.04), SIMDE_FLOAT16_VALUE(40.80), SIMDE_FLOAT16_VALUE(-45.80), SIMDE_FLOAT16_VALUE(37.28), + SIMDE_FLOAT16_VALUE(35.24), SIMDE_FLOAT16_VALUE(40.83), SIMDE_FLOAT16_VALUE(41.86), SIMDE_FLOAT16_VALUE(16.76) }, + { SIMDE_FLOAT16_VALUE(44.82), SIMDE_FLOAT16_VALUE(44.21), SIMDE_FLOAT16_VALUE(-16.87), SIMDE_FLOAT16_VALUE(28.04), + SIMDE_FLOAT16_VALUE(12.42), SIMDE_FLOAT16_VALUE(-24.77), SIMDE_FLOAT16_VALUE(-2.03), SIMDE_FLOAT16_VALUE(6.16) } }, + { SIMDE_FLOAT16_VALUE(24.51), SIMDE_FLOAT16_VALUE(-14.45), SIMDE_FLOAT16_VALUE(-29.20)}, + { { SIMDE_FLOAT16_VALUE(34.35), SIMDE_FLOAT16_VALUE(45.11), SIMDE_FLOAT16_VALUE(-17.38), SIMDE_FLOAT16_VALUE(32.13), + SIMDE_FLOAT16_VALUE(-19.58), SIMDE_FLOAT16_VALUE(24.51), SIMDE_FLOAT16_VALUE(-14.12), SIMDE_FLOAT16_VALUE(-0.42) }, + { SIMDE_FLOAT16_VALUE(-17.04), SIMDE_FLOAT16_VALUE(40.80), SIMDE_FLOAT16_VALUE(-45.80), SIMDE_FLOAT16_VALUE(37.28), + SIMDE_FLOAT16_VALUE(35.24), SIMDE_FLOAT16_VALUE(-14.45), SIMDE_FLOAT16_VALUE(41.86), SIMDE_FLOAT16_VALUE(16.76) }, + { SIMDE_FLOAT16_VALUE(44.82), SIMDE_FLOAT16_VALUE(44.21), SIMDE_FLOAT16_VALUE(-16.87), SIMDE_FLOAT16_VALUE(28.04), + SIMDE_FLOAT16_VALUE(12.42), SIMDE_FLOAT16_VALUE(-29.20), SIMDE_FLOAT16_VALUE(-2.03), SIMDE_FLOAT16_VALUE(6.16) } } }, + { { { SIMDE_FLOAT16_VALUE(-19.86), SIMDE_FLOAT16_VALUE(-49.12), SIMDE_FLOAT16_VALUE(-43.11), SIMDE_FLOAT16_VALUE(6.61), + SIMDE_FLOAT16_VALUE(-14.42), SIMDE_FLOAT16_VALUE(-26.68), SIMDE_FLOAT16_VALUE(-14.21), SIMDE_FLOAT16_VALUE(-19.62) }, + { SIMDE_FLOAT16_VALUE(2.80), SIMDE_FLOAT16_VALUE(21.66), SIMDE_FLOAT16_VALUE(-36.49), SIMDE_FLOAT16_VALUE(-15.12), + SIMDE_FLOAT16_VALUE(-49.41), SIMDE_FLOAT16_VALUE(-26.19), SIMDE_FLOAT16_VALUE(-24.23), SIMDE_FLOAT16_VALUE(36.09) }, + { SIMDE_FLOAT16_VALUE(10.26), SIMDE_FLOAT16_VALUE(31.20), SIMDE_FLOAT16_VALUE(-42.59), SIMDE_FLOAT16_VALUE(22.36), + SIMDE_FLOAT16_VALUE(-1.95), SIMDE_FLOAT16_VALUE(-8.53), SIMDE_FLOAT16_VALUE(37.02), SIMDE_FLOAT16_VALUE(-47.32) } }, + { SIMDE_FLOAT16_VALUE(-2.66), SIMDE_FLOAT16_VALUE(-28.19), SIMDE_FLOAT16_VALUE(-49.39)}, + { { SIMDE_FLOAT16_VALUE(-19.86), SIMDE_FLOAT16_VALUE(-49.12), SIMDE_FLOAT16_VALUE(-43.11), SIMDE_FLOAT16_VALUE(6.61), + SIMDE_FLOAT16_VALUE(-14.42), SIMDE_FLOAT16_VALUE(-26.68), SIMDE_FLOAT16_VALUE(-2.66), SIMDE_FLOAT16_VALUE(-19.62) }, + { SIMDE_FLOAT16_VALUE(2.80), SIMDE_FLOAT16_VALUE(21.66), SIMDE_FLOAT16_VALUE(-36.49), SIMDE_FLOAT16_VALUE(-15.12), + SIMDE_FLOAT16_VALUE(-49.41), SIMDE_FLOAT16_VALUE(-26.19), SIMDE_FLOAT16_VALUE(-28.19), SIMDE_FLOAT16_VALUE(36.09) }, + { SIMDE_FLOAT16_VALUE(10.26), SIMDE_FLOAT16_VALUE(31.20), SIMDE_FLOAT16_VALUE(-42.59), SIMDE_FLOAT16_VALUE(22.36), + SIMDE_FLOAT16_VALUE(-1.95), SIMDE_FLOAT16_VALUE(-8.53), SIMDE_FLOAT16_VALUE(-49.39), SIMDE_FLOAT16_VALUE(-47.32) } } }, + { { { SIMDE_FLOAT16_VALUE(0.30), SIMDE_FLOAT16_VALUE(6.73), SIMDE_FLOAT16_VALUE(-6.28), SIMDE_FLOAT16_VALUE(35.22), + SIMDE_FLOAT16_VALUE(-5.78), SIMDE_FLOAT16_VALUE(-32.42), SIMDE_FLOAT16_VALUE(-25.16), SIMDE_FLOAT16_VALUE(14.50) }, + { SIMDE_FLOAT16_VALUE(9.20), SIMDE_FLOAT16_VALUE(15.26), SIMDE_FLOAT16_VALUE(14.27), SIMDE_FLOAT16_VALUE(24.06), + SIMDE_FLOAT16_VALUE(9.76), SIMDE_FLOAT16_VALUE(-32.84), SIMDE_FLOAT16_VALUE(23.54), SIMDE_FLOAT16_VALUE(-38.60) }, + { SIMDE_FLOAT16_VALUE(-2.89), SIMDE_FLOAT16_VALUE(-12.52), SIMDE_FLOAT16_VALUE(-24.22), SIMDE_FLOAT16_VALUE(45.18), + SIMDE_FLOAT16_VALUE(-49.48), SIMDE_FLOAT16_VALUE(-38.06), SIMDE_FLOAT16_VALUE(0.89), SIMDE_FLOAT16_VALUE(-2.25) } }, + { SIMDE_FLOAT16_VALUE(2.18), SIMDE_FLOAT16_VALUE(35.99), SIMDE_FLOAT16_VALUE(26.18)}, + { { SIMDE_FLOAT16_VALUE(0.30), SIMDE_FLOAT16_VALUE(6.73), SIMDE_FLOAT16_VALUE(-6.28), SIMDE_FLOAT16_VALUE(35.22), + SIMDE_FLOAT16_VALUE(-5.78), SIMDE_FLOAT16_VALUE(-32.42), SIMDE_FLOAT16_VALUE(-25.16), SIMDE_FLOAT16_VALUE(2.18) }, + { SIMDE_FLOAT16_VALUE(9.20), SIMDE_FLOAT16_VALUE(15.26), SIMDE_FLOAT16_VALUE(14.27), SIMDE_FLOAT16_VALUE(24.06), + SIMDE_FLOAT16_VALUE(9.76), SIMDE_FLOAT16_VALUE(-32.84), SIMDE_FLOAT16_VALUE(23.54), SIMDE_FLOAT16_VALUE(35.99) }, + { SIMDE_FLOAT16_VALUE(-2.89), SIMDE_FLOAT16_VALUE(-12.52), SIMDE_FLOAT16_VALUE(-24.22), SIMDE_FLOAT16_VALUE(45.18), + SIMDE_FLOAT16_VALUE(-49.48), SIMDE_FLOAT16_VALUE(-38.06), SIMDE_FLOAT16_VALUE(0.89), SIMDE_FLOAT16_VALUE(26.18) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8x3_t r, src, expected; + src.val[0] = simde_vld1q_f16(test_vec[i].src[0]); + src.val[1] = simde_vld1q_f16(test_vec[i].src[1]); + src.val[2] = simde_vld1q_f16(test_vec[i].src[2]); + + SIMDE_CONSTIFY_8_(simde_vld3q_lane_f16, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_f16(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_f16(test_vec[i].r[1]); + expected.val[2] = simde_vld1q_f16(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_f16x8(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f16x8(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f16x8(r.val[2], expected.val[2], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld3q_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32_t src[3][4]; + simde_float32_t buf[3]; + simde_float32_t r[3][4]; + } test_vec[] = { + { { { SIMDE_FLOAT32_C(1442.30), SIMDE_FLOAT32_C(446.63), SIMDE_FLOAT32_C(2243.75), SIMDE_FLOAT32_C(-2499.62) }, + { SIMDE_FLOAT32_C(-1210.23), SIMDE_FLOAT32_C(1076.44), SIMDE_FLOAT32_C(304.40), SIMDE_FLOAT32_C(4898.02) }, + { SIMDE_FLOAT32_C(-4476.84), SIMDE_FLOAT32_C(808.69), SIMDE_FLOAT32_C(-1468.41), SIMDE_FLOAT32_C(4180.77) } }, + { SIMDE_FLOAT32_C(1068.72), SIMDE_FLOAT32_C(-3535.90), SIMDE_FLOAT32_C(-2034.07)}, + { { SIMDE_FLOAT32_C(1068.72), SIMDE_FLOAT32_C(446.63), SIMDE_FLOAT32_C(2243.75), SIMDE_FLOAT32_C(-2499.62) }, + { SIMDE_FLOAT32_C(-3535.90), SIMDE_FLOAT32_C(1076.44), SIMDE_FLOAT32_C(304.40), SIMDE_FLOAT32_C(4898.02) }, + { SIMDE_FLOAT32_C(-2034.07), SIMDE_FLOAT32_C(808.69), SIMDE_FLOAT32_C(-1468.41), SIMDE_FLOAT32_C(4180.77) } } }, + { { { SIMDE_FLOAT32_C(2644.49), SIMDE_FLOAT32_C(4971.83), SIMDE_FLOAT32_C(861.80), SIMDE_FLOAT32_C(3165.61) }, + { SIMDE_FLOAT32_C(981.55), SIMDE_FLOAT32_C(4193.07), SIMDE_FLOAT32_C(4921.81), SIMDE_FLOAT32_C(-254.58) }, + { SIMDE_FLOAT32_C(-1820.81), SIMDE_FLOAT32_C(-2716.98), SIMDE_FLOAT32_C(-4407.29), SIMDE_FLOAT32_C(3037.96) } }, + { SIMDE_FLOAT32_C(1589.12), SIMDE_FLOAT32_C(-3745.39), SIMDE_FLOAT32_C(-1432.03)}, + { { SIMDE_FLOAT32_C(2644.49), SIMDE_FLOAT32_C(1589.12), SIMDE_FLOAT32_C(861.80), SIMDE_FLOAT32_C(3165.61) }, + { SIMDE_FLOAT32_C(981.55), SIMDE_FLOAT32_C(-3745.39), SIMDE_FLOAT32_C(4921.81), SIMDE_FLOAT32_C(-254.58) }, + { SIMDE_FLOAT32_C(-1820.81), SIMDE_FLOAT32_C(-1432.03), SIMDE_FLOAT32_C(-4407.29), SIMDE_FLOAT32_C(3037.96) } } }, + { { { SIMDE_FLOAT32_C(103.78), SIMDE_FLOAT32_C(4622.34), SIMDE_FLOAT32_C(2752.48), SIMDE_FLOAT32_C(-1587.27) }, + { SIMDE_FLOAT32_C(-3510.64), SIMDE_FLOAT32_C(-3612.38), SIMDE_FLOAT32_C(-3439.33), SIMDE_FLOAT32_C(3770.62) }, + { SIMDE_FLOAT32_C(-2761.21), SIMDE_FLOAT32_C(4309.89), SIMDE_FLOAT32_C(-1652.27), SIMDE_FLOAT32_C(-3021.97) } }, + { SIMDE_FLOAT32_C(3900.94), SIMDE_FLOAT32_C(3824.24), SIMDE_FLOAT32_C(4471.20)}, + { { SIMDE_FLOAT32_C(103.78), SIMDE_FLOAT32_C(4622.34), SIMDE_FLOAT32_C(3900.94), SIMDE_FLOAT32_C(-1587.27) }, + { SIMDE_FLOAT32_C(-3510.64), SIMDE_FLOAT32_C(-3612.38), SIMDE_FLOAT32_C(3824.24), SIMDE_FLOAT32_C(3770.62) }, + { SIMDE_FLOAT32_C(-2761.21), SIMDE_FLOAT32_C(4309.89), SIMDE_FLOAT32_C(4471.20), SIMDE_FLOAT32_C(-3021.97) } } }, + { { { SIMDE_FLOAT32_C(4335.85), SIMDE_FLOAT32_C(-509.93), SIMDE_FLOAT32_C(3599.27), SIMDE_FLOAT32_C(3192.52) }, + { SIMDE_FLOAT32_C(3953.94), SIMDE_FLOAT32_C(-3940.66), SIMDE_FLOAT32_C(-0.37), SIMDE_FLOAT32_C(-529.08) }, + { SIMDE_FLOAT32_C(3260.04), SIMDE_FLOAT32_C(1545.62), SIMDE_FLOAT32_C(-1906.87), SIMDE_FLOAT32_C(3771.80) } }, + { SIMDE_FLOAT32_C(2065.71), SIMDE_FLOAT32_C(1441.87), SIMDE_FLOAT32_C(-3831.46)}, + { { SIMDE_FLOAT32_C(4335.85), SIMDE_FLOAT32_C(-509.93), SIMDE_FLOAT32_C(3599.27), SIMDE_FLOAT32_C(2065.71) }, + { SIMDE_FLOAT32_C(3953.94), SIMDE_FLOAT32_C(-3940.66), SIMDE_FLOAT32_C(-0.37), SIMDE_FLOAT32_C(1441.87) }, + { SIMDE_FLOAT32_C(3260.04), SIMDE_FLOAT32_C(1545.62), SIMDE_FLOAT32_C(-1906.87), SIMDE_FLOAT32_C(-3831.46) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4x3_t r, src, expected; + src.val[0] = simde_vld1q_f32(test_vec[i].src[0]); + src.val[1] = simde_vld1q_f32(test_vec[i].src[1]); + src.val[2] = simde_vld1q_f32(test_vec[i].src[2]); + + SIMDE_CONSTIFY_4_(simde_vld3q_lane_f32, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_f32(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_f32(test_vec[i].r[1]); + expected.val[2] = simde_vld1q_f32(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_f32x4(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f32x4(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f32x4(r.val[2], expected.val[2], INT_MAX); + } + + return 0; + +} + +static int +test_simde_vld3q_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64_t src[3][2]; + simde_float64_t buf[3]; + simde_float64_t r[3][2]; + } test_vec[] = { + { { { SIMDE_FLOAT64_C(332972.58), SIMDE_FLOAT64_C(-165510.51) }, + { SIMDE_FLOAT64_C(473205.46), SIMDE_FLOAT64_C(-167720.34) }, + { SIMDE_FLOAT64_C(203052.58), SIMDE_FLOAT64_C(-475421.32) } }, + { SIMDE_FLOAT64_C(-417188.68), SIMDE_FLOAT64_C(-109291.82), SIMDE_FLOAT64_C(-140556.76)}, + { { SIMDE_FLOAT64_C(-417188.68), SIMDE_FLOAT64_C(-165510.51) }, + { SIMDE_FLOAT64_C(-109291.82), SIMDE_FLOAT64_C(-167720.34) }, + { SIMDE_FLOAT64_C(-140556.76), SIMDE_FLOAT64_C(-475421.32) } } }, + { { { SIMDE_FLOAT64_C(-480339.81), SIMDE_FLOAT64_C(-50482.68) }, + { SIMDE_FLOAT64_C(214690.23), SIMDE_FLOAT64_C(242650.32) }, + { SIMDE_FLOAT64_C(-234163.45), SIMDE_FLOAT64_C(-301439.99) } }, + { SIMDE_FLOAT64_C(-402905.00), SIMDE_FLOAT64_C(-486035.94), SIMDE_FLOAT64_C(491188.01)}, + { { SIMDE_FLOAT64_C(-480339.81), SIMDE_FLOAT64_C(-402905.00) }, + { SIMDE_FLOAT64_C(214690.23), SIMDE_FLOAT64_C(-486035.94) }, + { SIMDE_FLOAT64_C(-234163.45), SIMDE_FLOAT64_C(491188.01) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2x3_t r, src, expected; + src.val[0] = simde_vld1q_f64(test_vec[i].src[0]); + src.val[1] = simde_vld1q_f64(test_vec[i].src[1]); + src.val[2] = simde_vld1q_f64(test_vec[i].src[2]); + + SIMDE_CONSTIFY_2_(simde_vld3q_lane_f64, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + expected.val[0] = simde_vld1q_f64(test_vec[i].r[0]); + expected.val[1] = simde_vld1q_f64(test_vec[i].r[1]); + expected.val[2] = simde_vld1q_f64(test_vec[i].r[2]); + + simde_test_arm_neon_assert_equal_f64x2(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f64x2(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f64x2(r.val[2], expected.val[2], INT_MAX); + } + + return 0; + +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_lane_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_lane_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_lane_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_lane_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_lane_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_lane_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_lane_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_lane_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_lane_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_lane_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3_lane_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_lane_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_lane_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_lane_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_lane_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_lane_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_lane_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_lane_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_lane_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_lane_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_lane_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_lane_f64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/ld4.c b/test/arm/neon/ld4.c new file mode 100644 index 000000000..b96995627 --- /dev/null +++ b/test/arm/neon/ld4.c @@ -0,0 +1,88 @@ +#define SIMDE_TEST_ARM_NEON_INSN ld4 + +#include "test-neon.h" +#include "../../../simde/arm/neon/ld4.h" + +#if !defined(SIMDE_BUG_INTEL_857088) + + +static int +test_simde_vld4_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[16]; + simde_float16_t r[4][4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-42.44), SIMDE_FLOAT16_VALUE(16.36), SIMDE_FLOAT16_VALUE(2.71), SIMDE_FLOAT16_VALUE(-2.23), SIMDE_FLOAT16_VALUE(6.68), SIMDE_FLOAT16_VALUE(13.18), SIMDE_FLOAT16_VALUE(-5.39), SIMDE_FLOAT16_VALUE(14.99), + SIMDE_FLOAT16_VALUE(29.61), SIMDE_FLOAT16_VALUE(5.95), SIMDE_FLOAT16_VALUE(-9.74), SIMDE_FLOAT16_VALUE(-47.17), SIMDE_FLOAT16_VALUE(26.25), SIMDE_FLOAT16_VALUE(48.73), SIMDE_FLOAT16_VALUE(31.37), SIMDE_FLOAT16_VALUE(-28.10) }, + { { SIMDE_FLOAT16_VALUE(-42.44), SIMDE_FLOAT16_VALUE(6.68), SIMDE_FLOAT16_VALUE(29.61), SIMDE_FLOAT16_VALUE(26.25) }, + { SIMDE_FLOAT16_VALUE(16.36), SIMDE_FLOAT16_VALUE(13.18), SIMDE_FLOAT16_VALUE(5.95), SIMDE_FLOAT16_VALUE(48.73) }, + { SIMDE_FLOAT16_VALUE(2.71), SIMDE_FLOAT16_VALUE(-5.39), SIMDE_FLOAT16_VALUE(-9.74), SIMDE_FLOAT16_VALUE(31.37) }, + { SIMDE_FLOAT16_VALUE(-2.23), SIMDE_FLOAT16_VALUE(14.99), SIMDE_FLOAT16_VALUE(-47.17), SIMDE_FLOAT16_VALUE(-28.10) } } }, + { { SIMDE_FLOAT16_VALUE(46.80), SIMDE_FLOAT16_VALUE(-2.58), SIMDE_FLOAT16_VALUE(-4.79), SIMDE_FLOAT16_VALUE(-40.09), SIMDE_FLOAT16_VALUE(-41.00), SIMDE_FLOAT16_VALUE(29.02), SIMDE_FLOAT16_VALUE(-2.54), SIMDE_FLOAT16_VALUE(34.40), + SIMDE_FLOAT16_VALUE(33.54), SIMDE_FLOAT16_VALUE(-47.97), SIMDE_FLOAT16_VALUE(0.26), SIMDE_FLOAT16_VALUE(-5.30), SIMDE_FLOAT16_VALUE(-28.10), SIMDE_FLOAT16_VALUE(-26.53), SIMDE_FLOAT16_VALUE(-36.91), SIMDE_FLOAT16_VALUE(-35.35) }, + { { SIMDE_FLOAT16_VALUE(46.80), SIMDE_FLOAT16_VALUE(-41.00), SIMDE_FLOAT16_VALUE(33.54), SIMDE_FLOAT16_VALUE(-28.10) }, + { SIMDE_FLOAT16_VALUE(-2.58), SIMDE_FLOAT16_VALUE(29.02), SIMDE_FLOAT16_VALUE(-47.97), SIMDE_FLOAT16_VALUE(-26.53) }, + { SIMDE_FLOAT16_VALUE(-4.79), SIMDE_FLOAT16_VALUE(-2.54), SIMDE_FLOAT16_VALUE(0.26), SIMDE_FLOAT16_VALUE(-36.91) }, + { SIMDE_FLOAT16_VALUE(-40.09), SIMDE_FLOAT16_VALUE(34.40), SIMDE_FLOAT16_VALUE(-5.30), SIMDE_FLOAT16_VALUE(-35.35) } } }, + { { SIMDE_FLOAT16_VALUE(-19.68), SIMDE_FLOAT16_VALUE(38.44), SIMDE_FLOAT16_VALUE(-20.27), SIMDE_FLOAT16_VALUE(40.73), SIMDE_FLOAT16_VALUE(35.19), SIMDE_FLOAT16_VALUE(-43.88), SIMDE_FLOAT16_VALUE(21.72), SIMDE_FLOAT16_VALUE(-34.52), + SIMDE_FLOAT16_VALUE(7.13), SIMDE_FLOAT16_VALUE(38.46), SIMDE_FLOAT16_VALUE(-3.30), SIMDE_FLOAT16_VALUE(-19.44), SIMDE_FLOAT16_VALUE(48.82), SIMDE_FLOAT16_VALUE(-43.82), SIMDE_FLOAT16_VALUE(46.77), SIMDE_FLOAT16_VALUE(39.55) }, + { { SIMDE_FLOAT16_VALUE(-19.68), SIMDE_FLOAT16_VALUE(35.19), SIMDE_FLOAT16_VALUE(7.13), SIMDE_FLOAT16_VALUE(48.82) }, + { SIMDE_FLOAT16_VALUE(38.44), SIMDE_FLOAT16_VALUE(-43.88), SIMDE_FLOAT16_VALUE(38.46), SIMDE_FLOAT16_VALUE(-43.82) }, + { SIMDE_FLOAT16_VALUE(-20.27), SIMDE_FLOAT16_VALUE(21.72), SIMDE_FLOAT16_VALUE(-3.30), SIMDE_FLOAT16_VALUE(46.77) }, + { SIMDE_FLOAT16_VALUE(40.73), SIMDE_FLOAT16_VALUE(-34.52), SIMDE_FLOAT16_VALUE(-19.44), SIMDE_FLOAT16_VALUE(39.55) } } }, + { { SIMDE_FLOAT16_VALUE(28.68), SIMDE_FLOAT16_VALUE(14.00), SIMDE_FLOAT16_VALUE(15.71), SIMDE_FLOAT16_VALUE(-4.78), SIMDE_FLOAT16_VALUE(-11.15), SIMDE_FLOAT16_VALUE(-35.31), SIMDE_FLOAT16_VALUE(-28.54), SIMDE_FLOAT16_VALUE(-9.91), + SIMDE_FLOAT16_VALUE(-19.18), SIMDE_FLOAT16_VALUE(-16.31), SIMDE_FLOAT16_VALUE(26.64), SIMDE_FLOAT16_VALUE(-0.60), SIMDE_FLOAT16_VALUE(46.00), SIMDE_FLOAT16_VALUE(29.32), SIMDE_FLOAT16_VALUE(49.37), SIMDE_FLOAT16_VALUE(49.85) }, + { { SIMDE_FLOAT16_VALUE(28.68), SIMDE_FLOAT16_VALUE(-11.15), SIMDE_FLOAT16_VALUE(-19.18), SIMDE_FLOAT16_VALUE(46.00) }, + { SIMDE_FLOAT16_VALUE(14.00), SIMDE_FLOAT16_VALUE(-35.31), SIMDE_FLOAT16_VALUE(-16.31), SIMDE_FLOAT16_VALUE(29.32) }, + { SIMDE_FLOAT16_VALUE(15.71), SIMDE_FLOAT16_VALUE(-28.54), SIMDE_FLOAT16_VALUE(26.64), SIMDE_FLOAT16_VALUE(49.37) }, + { SIMDE_FLOAT16_VALUE(-4.78), SIMDE_FLOAT16_VALUE(-9.91), SIMDE_FLOAT16_VALUE(-0.60), SIMDE_FLOAT16_VALUE(49.85) } } }, + { { SIMDE_FLOAT16_VALUE(45.93), SIMDE_FLOAT16_VALUE(-28.16), SIMDE_FLOAT16_VALUE(-45.54), SIMDE_FLOAT16_VALUE(-39.77), SIMDE_FLOAT16_VALUE(5.90), SIMDE_FLOAT16_VALUE(13.00), SIMDE_FLOAT16_VALUE(39.78), SIMDE_FLOAT16_VALUE(5.72), + SIMDE_FLOAT16_VALUE(-49.14), SIMDE_FLOAT16_VALUE(-9.08), SIMDE_FLOAT16_VALUE(-42.40), SIMDE_FLOAT16_VALUE(33.41), SIMDE_FLOAT16_VALUE(46.84), SIMDE_FLOAT16_VALUE(13.63), SIMDE_FLOAT16_VALUE(-1.26), SIMDE_FLOAT16_VALUE(-22.13) }, + { { SIMDE_FLOAT16_VALUE(45.93), SIMDE_FLOAT16_VALUE(5.90), SIMDE_FLOAT16_VALUE(-49.14), SIMDE_FLOAT16_VALUE(46.84) }, + { SIMDE_FLOAT16_VALUE(-28.16), SIMDE_FLOAT16_VALUE(13.00), SIMDE_FLOAT16_VALUE(-9.08), SIMDE_FLOAT16_VALUE(13.63) }, + { SIMDE_FLOAT16_VALUE(-45.54), SIMDE_FLOAT16_VALUE(39.78), SIMDE_FLOAT16_VALUE(-42.40), SIMDE_FLOAT16_VALUE(-1.26) }, + { SIMDE_FLOAT16_VALUE(-39.77), SIMDE_FLOAT16_VALUE(5.72), SIMDE_FLOAT16_VALUE(33.41), SIMDE_FLOAT16_VALUE(-22.13) } } }, + { { SIMDE_FLOAT16_VALUE(28.54), SIMDE_FLOAT16_VALUE(-6.45), SIMDE_FLOAT16_VALUE(2.44), SIMDE_FLOAT16_VALUE(-10.11), SIMDE_FLOAT16_VALUE(-13.99), SIMDE_FLOAT16_VALUE(-2.36), SIMDE_FLOAT16_VALUE(49.31), SIMDE_FLOAT16_VALUE(35.77), + SIMDE_FLOAT16_VALUE(-40.86), SIMDE_FLOAT16_VALUE(35.64), SIMDE_FLOAT16_VALUE(3.45), SIMDE_FLOAT16_VALUE(42.23), SIMDE_FLOAT16_VALUE(-23.61), SIMDE_FLOAT16_VALUE(-8.50), SIMDE_FLOAT16_VALUE(16.26), SIMDE_FLOAT16_VALUE(-39.89) }, + { { SIMDE_FLOAT16_VALUE(28.54), SIMDE_FLOAT16_VALUE(-13.99), SIMDE_FLOAT16_VALUE(-40.86), SIMDE_FLOAT16_VALUE(-23.61) }, + { SIMDE_FLOAT16_VALUE(-6.45), SIMDE_FLOAT16_VALUE(-2.36), SIMDE_FLOAT16_VALUE(35.64), SIMDE_FLOAT16_VALUE(-8.50) }, + { SIMDE_FLOAT16_VALUE(2.44), SIMDE_FLOAT16_VALUE(49.31), SIMDE_FLOAT16_VALUE(3.45), SIMDE_FLOAT16_VALUE(16.26) }, + { SIMDE_FLOAT16_VALUE(-10.11), SIMDE_FLOAT16_VALUE(35.77), SIMDE_FLOAT16_VALUE(42.23), SIMDE_FLOAT16_VALUE(-39.89) } } }, + { { SIMDE_FLOAT16_VALUE(28.38), SIMDE_FLOAT16_VALUE(-38.32), SIMDE_FLOAT16_VALUE(-20.75), SIMDE_FLOAT16_VALUE(-37.97), SIMDE_FLOAT16_VALUE(41.86), SIMDE_FLOAT16_VALUE(-20.13), SIMDE_FLOAT16_VALUE(20.27), SIMDE_FLOAT16_VALUE(12.36), + SIMDE_FLOAT16_VALUE(13.16), SIMDE_FLOAT16_VALUE(39.48), SIMDE_FLOAT16_VALUE(27.15), SIMDE_FLOAT16_VALUE(3.67), SIMDE_FLOAT16_VALUE(-33.92), SIMDE_FLOAT16_VALUE(36.94), SIMDE_FLOAT16_VALUE(-27.60), SIMDE_FLOAT16_VALUE(-10.49) }, + { { SIMDE_FLOAT16_VALUE(28.38), SIMDE_FLOAT16_VALUE(41.86), SIMDE_FLOAT16_VALUE(13.16), SIMDE_FLOAT16_VALUE(-33.92) }, + { SIMDE_FLOAT16_VALUE(-38.32), SIMDE_FLOAT16_VALUE(-20.13), SIMDE_FLOAT16_VALUE(39.48), SIMDE_FLOAT16_VALUE(36.94) }, + { SIMDE_FLOAT16_VALUE(-20.75), SIMDE_FLOAT16_VALUE(20.27), SIMDE_FLOAT16_VALUE(27.15), SIMDE_FLOAT16_VALUE(-27.60) }, + { SIMDE_FLOAT16_VALUE(-37.97), SIMDE_FLOAT16_VALUE(12.36), SIMDE_FLOAT16_VALUE(3.67), SIMDE_FLOAT16_VALUE(-10.49) } } }, + { { SIMDE_FLOAT16_VALUE(5.31), SIMDE_FLOAT16_VALUE(-31.56), SIMDE_FLOAT16_VALUE(17.63), SIMDE_FLOAT16_VALUE(-32.75), SIMDE_FLOAT16_VALUE(-22.50), SIMDE_FLOAT16_VALUE(11.55), SIMDE_FLOAT16_VALUE(21.93), SIMDE_FLOAT16_VALUE(-23.80), + SIMDE_FLOAT16_VALUE(19.27), SIMDE_FLOAT16_VALUE(0.51), SIMDE_FLOAT16_VALUE(15.52), SIMDE_FLOAT16_VALUE(6.68), SIMDE_FLOAT16_VALUE(48.85), SIMDE_FLOAT16_VALUE(-46.60), SIMDE_FLOAT16_VALUE(-40.92), SIMDE_FLOAT16_VALUE(-9.27) }, + { { SIMDE_FLOAT16_VALUE(5.31), SIMDE_FLOAT16_VALUE(-22.50), SIMDE_FLOAT16_VALUE(19.27), SIMDE_FLOAT16_VALUE(48.85) }, + { SIMDE_FLOAT16_VALUE(-31.56), SIMDE_FLOAT16_VALUE(11.55), SIMDE_FLOAT16_VALUE(0.51), SIMDE_FLOAT16_VALUE(-46.60) }, + { SIMDE_FLOAT16_VALUE(17.63), SIMDE_FLOAT16_VALUE(21.93), SIMDE_FLOAT16_VALUE(15.52), SIMDE_FLOAT16_VALUE(-40.92) }, + { SIMDE_FLOAT16_VALUE(-32.75), SIMDE_FLOAT16_VALUE(-23.80), SIMDE_FLOAT16_VALUE(6.68), SIMDE_FLOAT16_VALUE(-9.27) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4x4_t r = simde_vld4_f16(test_vec[i].a); + + simde_float16x4x4_t expected = { + {simde_vld1_f16(test_vec[i].r[0]), simde_vld1_f16(test_vec[i].r[1]), + simde_vld1_f16(test_vec[i].r[2]), simde_vld1_f16(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_f16x4(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f16x4(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f16x4(r.val[2], expected.val[2], INT_MAX); + simde_test_arm_neon_assert_equal_f16x4(r.val[3], expected.val[3], INT_MAX); + } + + return 0; +} + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ +SIMDE_TEST_FUNC_LIST_BEGIN +#if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4_f16) +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/ld4_dup.c b/test/arm/neon/ld4_dup.c new file mode 100644 index 000000000..44009354f --- /dev/null +++ b/test/arm/neon/ld4_dup.c @@ -0,0 +1,1846 @@ +#define SIMDE_TEST_ARM_NEON_INSN ld4_dup + +#include "test-neon.h" +#include "../../../simde/arm/neon/ld4_dup.h" + +#if !defined(SIMDE_BUG_INTEL_857088) + +static int +test_simde_vld4_dup_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[4]; + simde_float16_t unused[4]; + simde_float16_t r[4][4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(48.08), SIMDE_FLOAT16_VALUE(24.50), SIMDE_FLOAT16_VALUE(47.79), SIMDE_FLOAT16_VALUE(45.49) }, + { SIMDE_FLOAT16_VALUE(-22.72), SIMDE_FLOAT16_VALUE(6.28), SIMDE_FLOAT16_VALUE(-7.03), SIMDE_FLOAT16_VALUE(41.06)}, + { { SIMDE_FLOAT16_VALUE(48.08), SIMDE_FLOAT16_VALUE(48.08), SIMDE_FLOAT16_VALUE(48.08), SIMDE_FLOAT16_VALUE(48.08) }, + { SIMDE_FLOAT16_VALUE(24.50), SIMDE_FLOAT16_VALUE(24.50), SIMDE_FLOAT16_VALUE(24.50), SIMDE_FLOAT16_VALUE(24.50) }, + { SIMDE_FLOAT16_VALUE(47.79), SIMDE_FLOAT16_VALUE(47.79), SIMDE_FLOAT16_VALUE(47.79), SIMDE_FLOAT16_VALUE(47.79) }, + { SIMDE_FLOAT16_VALUE(45.49), SIMDE_FLOAT16_VALUE(45.49), SIMDE_FLOAT16_VALUE(45.49), SIMDE_FLOAT16_VALUE(45.49) } } }, + { { SIMDE_FLOAT16_VALUE(41.90), SIMDE_FLOAT16_VALUE(-45.37), SIMDE_FLOAT16_VALUE(43.54), SIMDE_FLOAT16_VALUE(10.59) }, + { SIMDE_FLOAT16_VALUE(36.07), SIMDE_FLOAT16_VALUE(28.31), SIMDE_FLOAT16_VALUE(-11.82), SIMDE_FLOAT16_VALUE(-16.27)}, + { { SIMDE_FLOAT16_VALUE(41.90), SIMDE_FLOAT16_VALUE(41.90), SIMDE_FLOAT16_VALUE(41.90), SIMDE_FLOAT16_VALUE(41.90) }, + { SIMDE_FLOAT16_VALUE(-45.37), SIMDE_FLOAT16_VALUE(-45.37), SIMDE_FLOAT16_VALUE(-45.37), SIMDE_FLOAT16_VALUE(-45.37) }, + { SIMDE_FLOAT16_VALUE(43.54), SIMDE_FLOAT16_VALUE(43.54), SIMDE_FLOAT16_VALUE(43.54), SIMDE_FLOAT16_VALUE(43.54) }, + { SIMDE_FLOAT16_VALUE(10.59), SIMDE_FLOAT16_VALUE(10.59), SIMDE_FLOAT16_VALUE(10.59), SIMDE_FLOAT16_VALUE(10.59) } } }, + { { SIMDE_FLOAT16_VALUE(3.73), SIMDE_FLOAT16_VALUE(-41.80), SIMDE_FLOAT16_VALUE(16.45), SIMDE_FLOAT16_VALUE(28.24) }, + { SIMDE_FLOAT16_VALUE(14.07), SIMDE_FLOAT16_VALUE(-1.72), SIMDE_FLOAT16_VALUE(-31.48), SIMDE_FLOAT16_VALUE(-4.95)}, + { { SIMDE_FLOAT16_VALUE(3.73), SIMDE_FLOAT16_VALUE(3.73), SIMDE_FLOAT16_VALUE(3.73), SIMDE_FLOAT16_VALUE(3.73) }, + { SIMDE_FLOAT16_VALUE(-41.80), SIMDE_FLOAT16_VALUE(-41.80), SIMDE_FLOAT16_VALUE(-41.80), SIMDE_FLOAT16_VALUE(-41.80) }, + { SIMDE_FLOAT16_VALUE(16.45), SIMDE_FLOAT16_VALUE(16.45), SIMDE_FLOAT16_VALUE(16.45), SIMDE_FLOAT16_VALUE(16.45) }, + { SIMDE_FLOAT16_VALUE(28.24), SIMDE_FLOAT16_VALUE(28.24), SIMDE_FLOAT16_VALUE(28.24), SIMDE_FLOAT16_VALUE(28.24) } } }, + { { SIMDE_FLOAT16_VALUE(-14.57), SIMDE_FLOAT16_VALUE(15.78), SIMDE_FLOAT16_VALUE(3.71), SIMDE_FLOAT16_VALUE(-44.25) }, + { SIMDE_FLOAT16_VALUE(30.42), SIMDE_FLOAT16_VALUE(7.31), SIMDE_FLOAT16_VALUE(-31.88), SIMDE_FLOAT16_VALUE(23.92)}, + { { SIMDE_FLOAT16_VALUE(-14.57), SIMDE_FLOAT16_VALUE(-14.57), SIMDE_FLOAT16_VALUE(-14.57), SIMDE_FLOAT16_VALUE(-14.57) }, + { SIMDE_FLOAT16_VALUE(15.78), SIMDE_FLOAT16_VALUE(15.78), SIMDE_FLOAT16_VALUE(15.78), SIMDE_FLOAT16_VALUE(15.78) }, + { SIMDE_FLOAT16_VALUE(3.71), SIMDE_FLOAT16_VALUE(3.71), SIMDE_FLOAT16_VALUE(3.71), SIMDE_FLOAT16_VALUE(3.71) }, + { SIMDE_FLOAT16_VALUE(-44.25), SIMDE_FLOAT16_VALUE(-44.25), SIMDE_FLOAT16_VALUE(-44.25), SIMDE_FLOAT16_VALUE(-44.25) } } }, + { { SIMDE_FLOAT16_VALUE(-7.08), SIMDE_FLOAT16_VALUE(-13.75), SIMDE_FLOAT16_VALUE(-46.19), SIMDE_FLOAT16_VALUE(14.37) }, + { SIMDE_FLOAT16_VALUE(-44.62), SIMDE_FLOAT16_VALUE(-35.77), SIMDE_FLOAT16_VALUE(-19.37), SIMDE_FLOAT16_VALUE(18.25)}, + { { SIMDE_FLOAT16_VALUE(-7.08), SIMDE_FLOAT16_VALUE(-7.08), SIMDE_FLOAT16_VALUE(-7.08), SIMDE_FLOAT16_VALUE(-7.08) }, + { SIMDE_FLOAT16_VALUE(-13.75), SIMDE_FLOAT16_VALUE(-13.75), SIMDE_FLOAT16_VALUE(-13.75), SIMDE_FLOAT16_VALUE(-13.75) }, + { SIMDE_FLOAT16_VALUE(-46.19), SIMDE_FLOAT16_VALUE(-46.19), SIMDE_FLOAT16_VALUE(-46.19), SIMDE_FLOAT16_VALUE(-46.19) }, + { SIMDE_FLOAT16_VALUE(14.37), SIMDE_FLOAT16_VALUE(14.37), SIMDE_FLOAT16_VALUE(14.37), SIMDE_FLOAT16_VALUE(14.37) } } }, + { { SIMDE_FLOAT16_VALUE(-41.21), SIMDE_FLOAT16_VALUE(23.55), SIMDE_FLOAT16_VALUE(-20.74), SIMDE_FLOAT16_VALUE(31.91) }, + { SIMDE_FLOAT16_VALUE(-5.45), SIMDE_FLOAT16_VALUE(-43.69), SIMDE_FLOAT16_VALUE(1.07), SIMDE_FLOAT16_VALUE(-15.34)}, + { { SIMDE_FLOAT16_VALUE(-41.21), SIMDE_FLOAT16_VALUE(-41.21), SIMDE_FLOAT16_VALUE(-41.21), SIMDE_FLOAT16_VALUE(-41.21) }, + { SIMDE_FLOAT16_VALUE(23.55), SIMDE_FLOAT16_VALUE(23.55), SIMDE_FLOAT16_VALUE(23.55), SIMDE_FLOAT16_VALUE(23.55) }, + { SIMDE_FLOAT16_VALUE(-20.74), SIMDE_FLOAT16_VALUE(-20.74), SIMDE_FLOAT16_VALUE(-20.74), SIMDE_FLOAT16_VALUE(-20.74) }, + { SIMDE_FLOAT16_VALUE(31.91), SIMDE_FLOAT16_VALUE(31.91), SIMDE_FLOAT16_VALUE(31.91), SIMDE_FLOAT16_VALUE(31.91) } } }, + { { SIMDE_FLOAT16_VALUE(12.19), SIMDE_FLOAT16_VALUE(-39.62), SIMDE_FLOAT16_VALUE(26.35), SIMDE_FLOAT16_VALUE(24.05) }, + { SIMDE_FLOAT16_VALUE(14.14), SIMDE_FLOAT16_VALUE(-6.68), SIMDE_FLOAT16_VALUE(-34.58), SIMDE_FLOAT16_VALUE(17.64)}, + { { SIMDE_FLOAT16_VALUE(12.19), SIMDE_FLOAT16_VALUE(12.19), SIMDE_FLOAT16_VALUE(12.19), SIMDE_FLOAT16_VALUE(12.19) }, + { SIMDE_FLOAT16_VALUE(-39.62), SIMDE_FLOAT16_VALUE(-39.62), SIMDE_FLOAT16_VALUE(-39.62), SIMDE_FLOAT16_VALUE(-39.62) }, + { SIMDE_FLOAT16_VALUE(26.35), SIMDE_FLOAT16_VALUE(26.35), SIMDE_FLOAT16_VALUE(26.35), SIMDE_FLOAT16_VALUE(26.35) }, + { SIMDE_FLOAT16_VALUE(24.05), SIMDE_FLOAT16_VALUE(24.05), SIMDE_FLOAT16_VALUE(24.05), SIMDE_FLOAT16_VALUE(24.05) } } }, + { { SIMDE_FLOAT16_VALUE(32.66), SIMDE_FLOAT16_VALUE(22.65), SIMDE_FLOAT16_VALUE(-23.52), SIMDE_FLOAT16_VALUE(8.31) }, + { SIMDE_FLOAT16_VALUE(-26.83), SIMDE_FLOAT16_VALUE(-6.25), SIMDE_FLOAT16_VALUE(38.29), SIMDE_FLOAT16_VALUE(-48.88)}, + { { SIMDE_FLOAT16_VALUE(32.66), SIMDE_FLOAT16_VALUE(32.66), SIMDE_FLOAT16_VALUE(32.66), SIMDE_FLOAT16_VALUE(32.66) }, + { SIMDE_FLOAT16_VALUE(22.65), SIMDE_FLOAT16_VALUE(22.65), SIMDE_FLOAT16_VALUE(22.65), SIMDE_FLOAT16_VALUE(22.65) }, + { SIMDE_FLOAT16_VALUE(-23.52), SIMDE_FLOAT16_VALUE(-23.52), SIMDE_FLOAT16_VALUE(-23.52), SIMDE_FLOAT16_VALUE(-23.52) }, + { SIMDE_FLOAT16_VALUE(8.31), SIMDE_FLOAT16_VALUE(8.31), SIMDE_FLOAT16_VALUE(8.31), SIMDE_FLOAT16_VALUE(8.31) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4x4_t r = simde_vld4_dup_f16(test_vec[i].a); + simde_float16x4x4_t expected = { + {simde_vld1_f16(test_vec[i].r[0]), simde_vld1_f16(test_vec[i].r[1]), + simde_vld1_f16(test_vec[i].r[2]), simde_vld1_f16(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_f16x4(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f16x4(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f16x4(r.val[2], expected.val[2], INT_MAX); + simde_test_arm_neon_assert_equal_f16x4(r.val[3], expected.val[3], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld4_dup_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + float a[4]; + float unused[4]; + float r[4][2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(-4866.18), SIMDE_FLOAT32_C(3024.56), SIMDE_FLOAT32_C(4169.66), SIMDE_FLOAT32_C(3637.17) }, + { SIMDE_FLOAT32_C(-555.46), SIMDE_FLOAT32_C(-1432.96), SIMDE_FLOAT32_C(3626.01), SIMDE_FLOAT32_C(2448.25)}, + { { SIMDE_FLOAT32_C(-4866.18), SIMDE_FLOAT32_C(-4866.18) }, + { SIMDE_FLOAT32_C(3024.56), SIMDE_FLOAT32_C(3024.56) }, + { SIMDE_FLOAT32_C(4169.66), SIMDE_FLOAT32_C(4169.66) }, + { SIMDE_FLOAT32_C(3637.17), SIMDE_FLOAT32_C(3637.17) } } }, + { { SIMDE_FLOAT32_C(2885.92), SIMDE_FLOAT32_C(-4536.63), SIMDE_FLOAT32_C(-4711.82), SIMDE_FLOAT32_C(-1956.08) }, + { SIMDE_FLOAT32_C(2407.68), SIMDE_FLOAT32_C(-2237.55), SIMDE_FLOAT32_C(-880.79), SIMDE_FLOAT32_C(3290.22)}, + { { SIMDE_FLOAT32_C(2885.92), SIMDE_FLOAT32_C(2885.92) }, + { SIMDE_FLOAT32_C(-4536.63), SIMDE_FLOAT32_C(-4536.63) }, + { SIMDE_FLOAT32_C(-4711.82), SIMDE_FLOAT32_C(-4711.82) }, + { SIMDE_FLOAT32_C(-1956.08), SIMDE_FLOAT32_C(-1956.08) } } }, + { { SIMDE_FLOAT32_C(-3180.23), SIMDE_FLOAT32_C(4861.05), SIMDE_FLOAT32_C(-1648.83), SIMDE_FLOAT32_C(-2068.69) }, + { SIMDE_FLOAT32_C(-110.56), SIMDE_FLOAT32_C(4643.01), SIMDE_FLOAT32_C(-3461.93), SIMDE_FLOAT32_C(-3173.78)}, + { { SIMDE_FLOAT32_C(-3180.23), SIMDE_FLOAT32_C(-3180.23) }, + { SIMDE_FLOAT32_C(4861.05), SIMDE_FLOAT32_C(4861.05) }, + { SIMDE_FLOAT32_C(-1648.83), SIMDE_FLOAT32_C(-1648.83) }, + { SIMDE_FLOAT32_C(-2068.69), SIMDE_FLOAT32_C(-2068.69) } } }, + { { SIMDE_FLOAT32_C(-3256.85), SIMDE_FLOAT32_C(-2010.85), SIMDE_FLOAT32_C(1399.90), SIMDE_FLOAT32_C(846.03) }, + { SIMDE_FLOAT32_C(3968.20), SIMDE_FLOAT32_C(-4075.92), SIMDE_FLOAT32_C(4628.91), SIMDE_FLOAT32_C(290.69)}, + { { SIMDE_FLOAT32_C(-3256.85), SIMDE_FLOAT32_C(-3256.85) }, + { SIMDE_FLOAT32_C(-2010.85), SIMDE_FLOAT32_C(-2010.85) }, + { SIMDE_FLOAT32_C(1399.90), SIMDE_FLOAT32_C(1399.90) }, + { SIMDE_FLOAT32_C(846.03), SIMDE_FLOAT32_C(846.03) } } }, + { { SIMDE_FLOAT32_C(1551.81), SIMDE_FLOAT32_C(-4234.91), SIMDE_FLOAT32_C(966.88), SIMDE_FLOAT32_C(-4750.78) }, + { SIMDE_FLOAT32_C(-4045.59), SIMDE_FLOAT32_C(-77.94), SIMDE_FLOAT32_C(-4706.59), SIMDE_FLOAT32_C(3025.78)}, + { { SIMDE_FLOAT32_C(1551.81), SIMDE_FLOAT32_C(1551.81) }, + { SIMDE_FLOAT32_C(-4234.91), SIMDE_FLOAT32_C(-4234.91) }, + { SIMDE_FLOAT32_C(966.88), SIMDE_FLOAT32_C(966.88) }, + { SIMDE_FLOAT32_C(-4750.78), SIMDE_FLOAT32_C(-4750.78) } } }, + { { SIMDE_FLOAT32_C(3149.37), SIMDE_FLOAT32_C(-1958.05), SIMDE_FLOAT32_C(45.43), SIMDE_FLOAT32_C(4005.09) }, + { SIMDE_FLOAT32_C(-4245.02), SIMDE_FLOAT32_C(3122.67), SIMDE_FLOAT32_C(-236.45), SIMDE_FLOAT32_C(1584.15)}, + { { SIMDE_FLOAT32_C(3149.37), SIMDE_FLOAT32_C(3149.37) }, + { SIMDE_FLOAT32_C(-1958.05), SIMDE_FLOAT32_C(-1958.05) }, + { SIMDE_FLOAT32_C(45.43), SIMDE_FLOAT32_C(45.43) }, + { SIMDE_FLOAT32_C(4005.09), SIMDE_FLOAT32_C(4005.09) } } }, + { { SIMDE_FLOAT32_C(-1387.85), SIMDE_FLOAT32_C(-386.78), SIMDE_FLOAT32_C(-3491.79), SIMDE_FLOAT32_C(1968.50) }, + { SIMDE_FLOAT32_C(-4637.25), SIMDE_FLOAT32_C(235.82), SIMDE_FLOAT32_C(4731.15), SIMDE_FLOAT32_C(1932.12)}, + { { SIMDE_FLOAT32_C(-1387.85), SIMDE_FLOAT32_C(-1387.85) }, + { SIMDE_FLOAT32_C(-386.78), SIMDE_FLOAT32_C(-386.78) }, + { SIMDE_FLOAT32_C(-3491.79), SIMDE_FLOAT32_C(-3491.79) }, + { SIMDE_FLOAT32_C(1968.50), SIMDE_FLOAT32_C(1968.50) } } }, + { { SIMDE_FLOAT32_C(-2629.13), SIMDE_FLOAT32_C(-3081.26), SIMDE_FLOAT32_C(271.35), SIMDE_FLOAT32_C(1861.69) }, + { SIMDE_FLOAT32_C(2693.30), SIMDE_FLOAT32_C(120.85), SIMDE_FLOAT32_C(-170.49), SIMDE_FLOAT32_C(980.19)}, + { { SIMDE_FLOAT32_C(-2629.13), SIMDE_FLOAT32_C(-2629.13) }, + { SIMDE_FLOAT32_C(-3081.26), SIMDE_FLOAT32_C(-3081.26) }, + { SIMDE_FLOAT32_C(271.35), SIMDE_FLOAT32_C(271.35) }, + { SIMDE_FLOAT32_C(1861.69), SIMDE_FLOAT32_C(1861.69) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2x4_t r = simde_vld4_dup_f32(test_vec[i].a); + simde_float32x2x4_t expected = { + {simde_vld1_f32(test_vec[i].r[0]), simde_vld1_f32(test_vec[i].r[1]), + simde_vld1_f32(test_vec[i].r[2]), simde_vld1_f32(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_f32x2(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f32x2(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f32x2(r.val[2], expected.val[2], INT_MAX); + simde_test_arm_neon_assert_equal_f32x2(r.val[3], expected.val[3], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld4_dup_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64 a[4]; + simde_float64 unused[4]; + simde_float64 r[4][1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(445851.81), SIMDE_FLOAT64_C(-330960.40), SIMDE_FLOAT64_C(309250.45), SIMDE_FLOAT64_C(236747.13) }, + { SIMDE_FLOAT64_C(-499126.60), SIMDE_FLOAT64_C(-369819.01), SIMDE_FLOAT64_C(-410746.00), SIMDE_FLOAT64_C(268713.23)}, + { { SIMDE_FLOAT64_C(445851.81) }, + { SIMDE_FLOAT64_C(-330960.40) }, + { SIMDE_FLOAT64_C(309250.45) }, + { SIMDE_FLOAT64_C(236747.13) } } }, + { { SIMDE_FLOAT64_C(128919.66), SIMDE_FLOAT64_C(-78616.76), SIMDE_FLOAT64_C(-118926.72), SIMDE_FLOAT64_C(463558.82) }, + { SIMDE_FLOAT64_C(269786.10), SIMDE_FLOAT64_C(344879.51), SIMDE_FLOAT64_C(-492649.86), SIMDE_FLOAT64_C(-294401.80)}, + { { SIMDE_FLOAT64_C(128919.66) }, + { SIMDE_FLOAT64_C(-78616.76) }, + { SIMDE_FLOAT64_C(-118926.72) }, + { SIMDE_FLOAT64_C(463558.82) } } }, + { { SIMDE_FLOAT64_C(-436676.59), SIMDE_FLOAT64_C(55808.72), SIMDE_FLOAT64_C(103053.10), SIMDE_FLOAT64_C(-68771.59) }, + { SIMDE_FLOAT64_C(-219554.42), SIMDE_FLOAT64_C(-365472.84), SIMDE_FLOAT64_C(-234535.24), SIMDE_FLOAT64_C(-485480.60)}, + { { SIMDE_FLOAT64_C(-436676.59) }, + { SIMDE_FLOAT64_C(55808.72) }, + { SIMDE_FLOAT64_C(103053.10) }, + { SIMDE_FLOAT64_C(-68771.59) } } }, + { { SIMDE_FLOAT64_C(236460.67), SIMDE_FLOAT64_C(120005.12), SIMDE_FLOAT64_C(401078.09), SIMDE_FLOAT64_C(144330.41) }, + { SIMDE_FLOAT64_C(-290342.75), SIMDE_FLOAT64_C(-428984.35), SIMDE_FLOAT64_C(481548.16), SIMDE_FLOAT64_C(-338674.87)}, + { { SIMDE_FLOAT64_C(236460.67) }, + { SIMDE_FLOAT64_C(120005.12) }, + { SIMDE_FLOAT64_C(401078.09) }, + { SIMDE_FLOAT64_C(144330.41) } } }, + { { SIMDE_FLOAT64_C(-219732.53), SIMDE_FLOAT64_C(126809.41), SIMDE_FLOAT64_C(417292.34), SIMDE_FLOAT64_C(-172476.55) }, + { SIMDE_FLOAT64_C(-365825.55), SIMDE_FLOAT64_C(430799.77), SIMDE_FLOAT64_C(220000.75), SIMDE_FLOAT64_C(-29407.47)}, + { { SIMDE_FLOAT64_C(-219732.53) }, + { SIMDE_FLOAT64_C(126809.41) }, + { SIMDE_FLOAT64_C(417292.34) }, + { SIMDE_FLOAT64_C(-172476.55) } } }, + { { SIMDE_FLOAT64_C(196876.79), SIMDE_FLOAT64_C(-119084.66), SIMDE_FLOAT64_C(-283899.71), SIMDE_FLOAT64_C(-398048.79) }, + { SIMDE_FLOAT64_C(-63597.93), SIMDE_FLOAT64_C(-490513.69), SIMDE_FLOAT64_C(221360.38), SIMDE_FLOAT64_C(306089.64)}, + { { SIMDE_FLOAT64_C(196876.79) }, + { SIMDE_FLOAT64_C(-119084.66) }, + { SIMDE_FLOAT64_C(-283899.71) }, + { SIMDE_FLOAT64_C(-398048.79) } } }, + { { SIMDE_FLOAT64_C(29349.60), SIMDE_FLOAT64_C(-332457.61), SIMDE_FLOAT64_C(174407.67), SIMDE_FLOAT64_C(-266917.65) }, + { SIMDE_FLOAT64_C(-134655.15), SIMDE_FLOAT64_C(201659.83), SIMDE_FLOAT64_C(310984.82), SIMDE_FLOAT64_C(382838.92)}, + { { SIMDE_FLOAT64_C(29349.60) }, + { SIMDE_FLOAT64_C(-332457.61) }, + { SIMDE_FLOAT64_C(174407.67) }, + { SIMDE_FLOAT64_C(-266917.65) } } }, + { { SIMDE_FLOAT64_C(-451734.93), SIMDE_FLOAT64_C(-9569.57), SIMDE_FLOAT64_C(271001.72), SIMDE_FLOAT64_C(-452907.74) }, + { SIMDE_FLOAT64_C(-167433.41), SIMDE_FLOAT64_C(322890.62), SIMDE_FLOAT64_C(-216738.19), SIMDE_FLOAT64_C(483419.12)}, + { { SIMDE_FLOAT64_C(-451734.93) }, + { SIMDE_FLOAT64_C(-9569.57) }, + { SIMDE_FLOAT64_C(271001.72) }, + { SIMDE_FLOAT64_C(-452907.74) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1x4_t r = simde_vld4_dup_f64(test_vec[i].a); + simde_float64x1x4_t expected = { + {simde_vld1_f64(test_vec[i].r[0]), simde_vld1_f64(test_vec[i].r[1]), + simde_vld1_f64(test_vec[i].r[2]), simde_vld1_f64(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_f64x1(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f64x1(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f64x1(r.val[2], expected.val[2], INT_MAX); + simde_test_arm_neon_assert_equal_f64x1(r.val[3], expected.val[3], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld4_dup_s8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int8_t a[4]; + int8_t unused[4]; + int8_t r[4][8]; + } test_vec[] = { + { { INT8_C(68), -INT8_C(10), INT8_C(21), -INT8_C(89) }, + { INT8_C(75), -INT8_C(86), -INT8_C(69), -INT8_C(58)}, + { { INT8_C(68), INT8_C(68), INT8_C(68), INT8_C(68), + INT8_C(68), INT8_C(68), INT8_C(68), INT8_C(68) }, + { -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10), + -INT8_C(10), -INT8_C(10), -INT8_C(10), -INT8_C(10) }, + { INT8_C(21), INT8_C(21), INT8_C(21), INT8_C(21), + INT8_C(21), INT8_C(21), INT8_C(21), INT8_C(21) }, + { -INT8_C(89), -INT8_C(89), -INT8_C(89), -INT8_C(89), + -INT8_C(89), -INT8_C(89), -INT8_C(89), -INT8_C(89) } } }, + { { INT8_C(4), -INT8_C(3), INT8_C(0), INT8_C(14) }, + { INT8_C(91), -INT8_C(28), INT8_C(90), INT8_C(82)}, + { { INT8_C(4), INT8_C(4), INT8_C(4), INT8_C(4), + INT8_C(4), INT8_C(4), INT8_C(4), INT8_C(4) }, + { -INT8_C(3), -INT8_C(3), -INT8_C(3), -INT8_C(3), + -INT8_C(3), -INT8_C(3), -INT8_C(3), -INT8_C(3) }, + { INT8_C(0), INT8_C(0), INT8_C(0), INT8_C(0), + INT8_C(0), INT8_C(0), INT8_C(0), INT8_C(0) }, + { INT8_C(14), INT8_C(14), INT8_C(14), INT8_C(14), + INT8_C(14), INT8_C(14), INT8_C(14), INT8_C(14) } } }, + { { -INT8_C(45), -INT8_C(54), -INT8_C(38), INT8_C(76) }, + { INT8_C(47), INT8_C(56), -INT8_C(84), INT8_C(77)}, + { { -INT8_C(45), -INT8_C(45), -INT8_C(45), -INT8_C(45), + -INT8_C(45), -INT8_C(45), -INT8_C(45), -INT8_C(45) }, + { -INT8_C(54), -INT8_C(54), -INT8_C(54), -INT8_C(54), + -INT8_C(54), -INT8_C(54), -INT8_C(54), -INT8_C(54) }, + { -INT8_C(38), -INT8_C(38), -INT8_C(38), -INT8_C(38), + -INT8_C(38), -INT8_C(38), -INT8_C(38), -INT8_C(38) }, + { INT8_C(76), INT8_C(76), INT8_C(76), INT8_C(76), + INT8_C(76), INT8_C(76), INT8_C(76), INT8_C(76) } } }, + { { INT8_C(58), -INT8_C(36), -INT8_C(36), -INT8_C(12) }, + { -INT8_C(65), INT8_C(81), INT8_C(42), INT8_C(83)}, + { { INT8_C(58), INT8_C(58), INT8_C(58), INT8_C(58), + INT8_C(58), INT8_C(58), INT8_C(58), INT8_C(58) }, + { -INT8_C(36), -INT8_C(36), -INT8_C(36), -INT8_C(36), + -INT8_C(36), -INT8_C(36), -INT8_C(36), -INT8_C(36) }, + { -INT8_C(36), -INT8_C(36), -INT8_C(36), -INT8_C(36), + -INT8_C(36), -INT8_C(36), -INT8_C(36), -INT8_C(36) }, + { -INT8_C(12), -INT8_C(12), -INT8_C(12), -INT8_C(12), + -INT8_C(12), -INT8_C(12), -INT8_C(12), -INT8_C(12) } } }, + { { INT8_C(59), INT8_C(19), INT8_C(19), -INT8_C(70) }, + { INT8_C(39), -INT8_C(22), -INT8_C(45), INT8_C(85)}, + { { INT8_C(59), INT8_C(59), INT8_C(59), INT8_C(59), + INT8_C(59), INT8_C(59), INT8_C(59), INT8_C(59) }, + { INT8_C(19), INT8_C(19), INT8_C(19), INT8_C(19), + INT8_C(19), INT8_C(19), INT8_C(19), INT8_C(19) }, + { INT8_C(19), INT8_C(19), INT8_C(19), INT8_C(19), + INT8_C(19), INT8_C(19), INT8_C(19), INT8_C(19) }, + { -INT8_C(70), -INT8_C(70), -INT8_C(70), -INT8_C(70), + -INT8_C(70), -INT8_C(70), -INT8_C(70), -INT8_C(70) } } }, + { { -INT8_C(61), -INT8_C(4), -INT8_C(96), -INT8_C(27) }, + { INT8_C(5), INT8_C(13), INT8_C(35), INT8_C(16)}, + { { -INT8_C(61), -INT8_C(61), -INT8_C(61), -INT8_C(61), + -INT8_C(61), -INT8_C(61), -INT8_C(61), -INT8_C(61) }, + { -INT8_C(4), -INT8_C(4), -INT8_C(4), -INT8_C(4), + -INT8_C(4), -INT8_C(4), -INT8_C(4), -INT8_C(4) }, + { -INT8_C(96), -INT8_C(96), -INT8_C(96), -INT8_C(96), + -INT8_C(96), -INT8_C(96), -INT8_C(96), -INT8_C(96) }, + { -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27), + -INT8_C(27), -INT8_C(27), -INT8_C(27), -INT8_C(27) } } }, + { { -INT8_C(14), INT8_C(0), INT8_C(90), -INT8_C(64) }, + { -INT8_C(16), -INT8_C(49), INT8_C(66), -INT8_C(88)}, + { { -INT8_C(14), -INT8_C(14), -INT8_C(14), -INT8_C(14), + -INT8_C(14), -INT8_C(14), -INT8_C(14), -INT8_C(14) }, + { INT8_C(0), INT8_C(0), INT8_C(0), INT8_C(0), + INT8_C(0), INT8_C(0), INT8_C(0), INT8_C(0) }, + { INT8_C(90), INT8_C(90), INT8_C(90), INT8_C(90), + INT8_C(90), INT8_C(90), INT8_C(90), INT8_C(90) }, + { -INT8_C(64), -INT8_C(64), -INT8_C(64), -INT8_C(64), + -INT8_C(64), -INT8_C(64), -INT8_C(64), -INT8_C(64) } } }, + { { INT8_C(12), INT8_C(19), -INT8_C(31), INT8_C(73) }, + { INT8_C(68), -INT8_C(22), -INT8_C(73), -INT8_C(35)}, + { { INT8_C(12), INT8_C(12), INT8_C(12), INT8_C(12), + INT8_C(12), INT8_C(12), INT8_C(12), INT8_C(12) }, + { INT8_C(19), INT8_C(19), INT8_C(19), INT8_C(19), + INT8_C(19), INT8_C(19), INT8_C(19), INT8_C(19) }, + { -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31), + -INT8_C(31), -INT8_C(31), -INT8_C(31), -INT8_C(31) }, + { INT8_C(73), INT8_C(73), INT8_C(73), INT8_C(73), + INT8_C(73), INT8_C(73), INT8_C(73), INT8_C(73) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x8x4_t r = simde_vld4_dup_s8(test_vec[i].a); + simde_int8x8x4_t expected = { + {simde_vld1_s8(test_vec[i].r[0]), simde_vld1_s8(test_vec[i].r[1]), + simde_vld1_s8(test_vec[i].r[2]), simde_vld1_s8(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_i8x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i8x8(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i8x8(r.val[2], expected.val[2]); + simde_test_arm_neon_assert_equal_i8x8(r.val[3], expected.val[3]); + } + + return 0; +} + +static int +test_simde_vld4_dup_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[4]; + int16_t unused[4]; + int16_t r[4][4]; + } test_vec[] = { + { { -INT16_C(1402), -INT16_C(4269), INT16_C(9722), INT16_C(6315) }, + { -INT16_C(6328), -INT16_C(6730), -INT16_C(363), -INT16_C(2341)}, + { { -INT16_C(1402), -INT16_C(1402), -INT16_C(1402), -INT16_C(1402) }, + { -INT16_C(4269), -INT16_C(4269), -INT16_C(4269), -INT16_C(4269) }, + { INT16_C(9722), INT16_C(9722), INT16_C(9722), INT16_C(9722) }, + { INT16_C(6315), INT16_C(6315), INT16_C(6315), INT16_C(6315) } } }, + { { -INT16_C(6055), INT16_C(9968), -INT16_C(8842), -INT16_C(8554) }, + { -INT16_C(1679), -INT16_C(8145), -INT16_C(3034), INT16_C(7239)}, + { { -INT16_C(6055), -INT16_C(6055), -INT16_C(6055), -INT16_C(6055) }, + { INT16_C(9968), INT16_C(9968), INT16_C(9968), INT16_C(9968) }, + { -INT16_C(8842), -INT16_C(8842), -INT16_C(8842), -INT16_C(8842) }, + { -INT16_C(8554), -INT16_C(8554), -INT16_C(8554), -INT16_C(8554) } } }, + { { INT16_C(9981), -INT16_C(5549), INT16_C(7977), INT16_C(4465) }, + { INT16_C(2194), INT16_C(1398), INT16_C(7386), -INT16_C(5978)}, + { { INT16_C(9981), INT16_C(9981), INT16_C(9981), INT16_C(9981) }, + { -INT16_C(5549), -INT16_C(5549), -INT16_C(5549), -INT16_C(5549) }, + { INT16_C(7977), INT16_C(7977), INT16_C(7977), INT16_C(7977) }, + { INT16_C(4465), INT16_C(4465), INT16_C(4465), INT16_C(4465) } } }, + { { INT16_C(1524), INT16_C(3583), INT16_C(3051), -INT16_C(4914) }, + { INT16_C(7240), -INT16_C(9368), INT16_C(4797), -INT16_C(928)}, + { { INT16_C(1524), INT16_C(1524), INT16_C(1524), INT16_C(1524) }, + { INT16_C(3583), INT16_C(3583), INT16_C(3583), INT16_C(3583) }, + { INT16_C(3051), INT16_C(3051), INT16_C(3051), INT16_C(3051) }, + { -INT16_C(4914), -INT16_C(4914), -INT16_C(4914), -INT16_C(4914) } } }, + { { INT16_C(3773), INT16_C(2624), -INT16_C(3277), INT16_C(1755) }, + { -INT16_C(1621), INT16_C(4772), INT16_C(1264), -INT16_C(7704)}, + { { INT16_C(3773), INT16_C(3773), INT16_C(3773), INT16_C(3773) }, + { INT16_C(2624), INT16_C(2624), INT16_C(2624), INT16_C(2624) }, + { -INT16_C(3277), -INT16_C(3277), -INT16_C(3277), -INT16_C(3277) }, + { INT16_C(1755), INT16_C(1755), INT16_C(1755), INT16_C(1755) } } }, + { { INT16_C(5286), INT16_C(2345), -INT16_C(5987), -INT16_C(2753) }, + { INT16_C(6664), -INT16_C(2286), -INT16_C(7865), -INT16_C(1672)}, + { { INT16_C(5286), INT16_C(5286), INT16_C(5286), INT16_C(5286) }, + { INT16_C(2345), INT16_C(2345), INT16_C(2345), INT16_C(2345) }, + { -INT16_C(5987), -INT16_C(5987), -INT16_C(5987), -INT16_C(5987) }, + { -INT16_C(2753), -INT16_C(2753), -INT16_C(2753), -INT16_C(2753) } } }, + { { INT16_C(2561), -INT16_C(9303), INT16_C(1061), -INT16_C(7213) }, + { -INT16_C(881), -INT16_C(3042), -INT16_C(8610), INT16_C(7325)}, + { { INT16_C(2561), INT16_C(2561), INT16_C(2561), INT16_C(2561) }, + { -INT16_C(9303), -INT16_C(9303), -INT16_C(9303), -INT16_C(9303) }, + { INT16_C(1061), INT16_C(1061), INT16_C(1061), INT16_C(1061) }, + { -INT16_C(7213), -INT16_C(7213), -INT16_C(7213), -INT16_C(7213) } } }, + { { -INT16_C(7579), -INT16_C(9735), INT16_C(7966), -INT16_C(6710) }, + { INT16_C(4700), INT16_C(1443), -INT16_C(9728), INT16_C(4236)}, + { { -INT16_C(7579), -INT16_C(7579), -INT16_C(7579), -INT16_C(7579) }, + { -INT16_C(9735), -INT16_C(9735), -INT16_C(9735), -INT16_C(9735) }, + { INT16_C(7966), INT16_C(7966), INT16_C(7966), INT16_C(7966) }, + { -INT16_C(6710), -INT16_C(6710), -INT16_C(6710), -INT16_C(6710) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x4x4_t r = simde_vld4_dup_s16(test_vec[i].a); + simde_int16x4x4_t expected = { + {simde_vld1_s16(test_vec[i].r[0]), simde_vld1_s16(test_vec[i].r[1]), + simde_vld1_s16(test_vec[i].r[2]), simde_vld1_s16(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_i16x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i16x4(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i16x4(r.val[2], expected.val[2]); + simde_test_arm_neon_assert_equal_i16x4(r.val[3], expected.val[3]); + } + + return 0; +} + +static int +test_simde_vld4_dup_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int32_t unused[4]; + int32_t r[4][2]; + } test_vec[] = { + { { -INT32_C(652086), INT32_C(830115), INT32_C(895273), INT32_C(232307) }, + { INT32_C(514922), INT32_C(559641), INT32_C(719002), -INT32_C(522640)}, + { { -INT32_C(652086), -INT32_C(652086) }, + { INT32_C(830115), INT32_C(830115) }, + { INT32_C(895273), INT32_C(895273) }, + { INT32_C(232307), INT32_C(232307) } } }, + { { INT32_C(82629), -INT32_C(89616), -INT32_C(760319), -INT32_C(327798) }, + { INT32_C(554229), INT32_C(682786), -INT32_C(465184), -INT32_C(155186)}, + { { INT32_C(82629), INT32_C(82629) }, + { -INT32_C(89616), -INT32_C(89616) }, + { -INT32_C(760319), -INT32_C(760319) }, + { -INT32_C(327798), -INT32_C(327798) } } }, + { { INT32_C(615756), INT32_C(857632), INT32_C(581076), -INT32_C(492986) }, + { INT32_C(528868), INT32_C(762087), -INT32_C(681106), INT32_C(872087)}, + { { INT32_C(615756), INT32_C(615756) }, + { INT32_C(857632), INT32_C(857632) }, + { INT32_C(581076), INT32_C(581076) }, + { -INT32_C(492986), -INT32_C(492986) } } }, + { { INT32_C(561833), INT32_C(921404), -INT32_C(122567), -INT32_C(732579) }, + { INT32_C(430965), -INT32_C(874932), -INT32_C(757332), INT32_C(938068)}, + { { INT32_C(561833), INT32_C(561833) }, + { INT32_C(921404), INT32_C(921404) }, + { -INT32_C(122567), -INT32_C(122567) }, + { -INT32_C(732579), -INT32_C(732579) } } }, + { { -INT32_C(939154), INT32_C(381182), INT32_C(713323), INT32_C(605166) }, + { -INT32_C(796256), INT32_C(261174), -INT32_C(632162), -INT32_C(411330)}, + { { -INT32_C(939154), -INT32_C(939154) }, + { INT32_C(381182), INT32_C(381182) }, + { INT32_C(713323), INT32_C(713323) }, + { INT32_C(605166), INT32_C(605166) } } }, + { { INT32_C(841578), -INT32_C(117022), INT32_C(195746), INT32_C(170810) }, + { INT32_C(250486), -INT32_C(118087), INT32_C(147822), -INT32_C(446251)}, + { { INT32_C(841578), INT32_C(841578) }, + { -INT32_C(117022), -INT32_C(117022) }, + { INT32_C(195746), INT32_C(195746) }, + { INT32_C(170810), INT32_C(170810) } } }, + { { INT32_C(625951), -INT32_C(434473), -INT32_C(487392), INT32_C(450871) }, + { -INT32_C(21248), -INT32_C(49978), -INT32_C(870372), INT32_C(829835)}, + { { INT32_C(625951), INT32_C(625951) }, + { -INT32_C(434473), -INT32_C(434473) }, + { -INT32_C(487392), -INT32_C(487392) }, + { INT32_C(450871), INT32_C(450871) } } }, + { { -INT32_C(233278), INT32_C(485494), INT32_C(80564), INT32_C(415117) }, + { INT32_C(663911), INT32_C(238735), -INT32_C(926064), -INT32_C(519217)}, + { { -INT32_C(233278), -INT32_C(233278) }, + { INT32_C(485494), INT32_C(485494) }, + { INT32_C(80564), INT32_C(80564) }, + { INT32_C(415117), INT32_C(415117) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x2x4_t r = simde_vld4_dup_s32(test_vec[i].a); + simde_int32x2x4_t expected = { + {simde_vld1_s32(test_vec[i].r[0]), simde_vld1_s32(test_vec[i].r[1]), + simde_vld1_s32(test_vec[i].r[2]), simde_vld1_s32(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_i32x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i32x2(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i32x2(r.val[2], expected.val[2]); + simde_test_arm_neon_assert_equal_i32x2(r.val[3], expected.val[3]); + } + + return 0; +} + +static int +test_simde_vld4_dup_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[4]; + int64_t unused[4]; + int64_t r[4][1]; + } test_vec[] = { + { { -INT64_C(56733020), -INT64_C(17318014), INT64_C(12078569), INT64_C(82612068) }, + { -INT64_C(20859555), INT64_C(24737019), -INT64_C(20445337), INT64_C(67301356)}, + { { -INT64_C(56733020) }, + { -INT64_C(17318014) }, + { INT64_C(12078569) }, + { INT64_C(82612068) } } }, + { { INT64_C(1202527), INT64_C(4559604), INT64_C(14768409), -INT64_C(2188024) }, + { -INT64_C(21310606), INT64_C(5729689), -INT64_C(99218139), -INT64_C(6040)}, + { { INT64_C(1202527) }, + { INT64_C(4559604) }, + { INT64_C(14768409) }, + { -INT64_C(2188024) } } }, + { { -INT64_C(75916173), -INT64_C(36700612), -INT64_C(62050202), INT64_C(75384012) }, + { -INT64_C(89468615), -INT64_C(42608876), INT64_C(11124466), INT64_C(56397635)}, + { { -INT64_C(75916173) }, + { -INT64_C(36700612) }, + { -INT64_C(62050202) }, + { INT64_C(75384012) } } }, + { { INT64_C(76939592), -INT64_C(95421372), INT64_C(45337248), -INT64_C(57030409) }, + { INT64_C(49021299), -INT64_C(76456874), -INT64_C(18305372), INT64_C(85721942)}, + { { INT64_C(76939592) }, + { -INT64_C(95421372) }, + { INT64_C(45337248) }, + { -INT64_C(57030409) } } }, + { { -INT64_C(86501396), INT64_C(88982191), -INT64_C(84306578), INT64_C(13244094) }, + { INT64_C(92227627), INT64_C(69395035), INT64_C(98608750), INT64_C(37417231)}, + { { -INT64_C(86501396) }, + { INT64_C(88982191) }, + { -INT64_C(84306578) }, + { INT64_C(13244094) } } }, + { { -INT64_C(87804813), -INT64_C(41441843), -INT64_C(85279399), INT64_C(49312807) }, + { INT64_C(20569053), INT64_C(13178173), INT64_C(24706835), INT64_C(40884278)}, + { { -INT64_C(87804813) }, + { -INT64_C(41441843) }, + { -INT64_C(85279399) }, + { INT64_C(49312807) } } }, + { { -INT64_C(20293605), -INT64_C(9407517), INT64_C(48222736), -INT64_C(54034755) }, + { -INT64_C(87497232), -INT64_C(49279753), INT64_C(94490130), INT64_C(30019721)}, + { { -INT64_C(20293605) }, + { -INT64_C(9407517) }, + { INT64_C(48222736) }, + { -INT64_C(54034755) } } }, + { { -INT64_C(91077708), INT64_C(43106202), INT64_C(30721388), -INT64_C(92913395) }, + { INT64_C(86136901), INT64_C(40220309), -INT64_C(67313051), INT64_C(57922186)}, + { { -INT64_C(91077708) }, + { INT64_C(43106202) }, + { INT64_C(30721388) }, + { -INT64_C(92913395) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x1x4_t r = simde_vld4_dup_s64(test_vec[i].a); + simde_int64x1x4_t expected = { + {simde_vld1_s64(test_vec[i].r[0]), simde_vld1_s64(test_vec[i].r[1]), + simde_vld1_s64(test_vec[i].r[2]), simde_vld1_s64(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_i64x1(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i64x1(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i64x1(r.val[2], expected.val[2]); + simde_test_arm_neon_assert_equal_i64x1(r.val[3], expected.val[3]); + } + + return 0; +} + +static int +test_simde_vld4_dup_u8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint8_t a[4]; + uint8_t unused[4]; + uint8_t r[4][8]; + } test_vec[] = { + { { UINT8_C(113), UINT8_C(73), UINT8_C(82), UINT8_C(124) }, + { UINT8_C(97), UINT8_C(44), UINT8_C(46), UINT8_C(6)}, + { { UINT8_C(113), UINT8_C(113), UINT8_C(113), UINT8_C(113), + UINT8_C(113), UINT8_C(113), UINT8_C(113), UINT8_C(113) }, + { UINT8_C(73), UINT8_C(73), UINT8_C(73), UINT8_C(73), + UINT8_C(73), UINT8_C(73), UINT8_C(73), UINT8_C(73) }, + { UINT8_C(82), UINT8_C(82), UINT8_C(82), UINT8_C(82), + UINT8_C(82), UINT8_C(82), UINT8_C(82), UINT8_C(82) }, + { UINT8_C(124), UINT8_C(124), UINT8_C(124), UINT8_C(124), + UINT8_C(124), UINT8_C(124), UINT8_C(124), UINT8_C(124) } } }, + { { UINT8_C(123), UINT8_C(99), UINT8_C(73), UINT8_C(31) }, + { UINT8_C(12), UINT8_C(196), UINT8_C(169), UINT8_C(0)}, + { { UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), + UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123) }, + { UINT8_C(99), UINT8_C(99), UINT8_C(99), UINT8_C(99), + UINT8_C(99), UINT8_C(99), UINT8_C(99), UINT8_C(99) }, + { UINT8_C(73), UINT8_C(73), UINT8_C(73), UINT8_C(73), + UINT8_C(73), UINT8_C(73), UINT8_C(73), UINT8_C(73) }, + { UINT8_C(31), UINT8_C(31), UINT8_C(31), UINT8_C(31), + UINT8_C(31), UINT8_C(31), UINT8_C(31), UINT8_C(31) } } }, + { { UINT8_C(49), UINT8_C(131), UINT8_C(102), UINT8_C(104) }, + { UINT8_C(85), UINT8_C(1), UINT8_C(54), UINT8_C(25)}, + { { UINT8_C(49), UINT8_C(49), UINT8_C(49), UINT8_C(49), + UINT8_C(49), UINT8_C(49), UINT8_C(49), UINT8_C(49) }, + { UINT8_C(131), UINT8_C(131), UINT8_C(131), UINT8_C(131), + UINT8_C(131), UINT8_C(131), UINT8_C(131), UINT8_C(131) }, + { UINT8_C(102), UINT8_C(102), UINT8_C(102), UINT8_C(102), + UINT8_C(102), UINT8_C(102), UINT8_C(102), UINT8_C(102) }, + { UINT8_C(104), UINT8_C(104), UINT8_C(104), UINT8_C(104), + UINT8_C(104), UINT8_C(104), UINT8_C(104), UINT8_C(104) } } }, + { { UINT8_C(188), UINT8_C(156), UINT8_C(48), UINT8_C(9) }, + { UINT8_C(54), UINT8_C(21), UINT8_C(63), UINT8_C(114)}, + { { UINT8_C(188), UINT8_C(188), UINT8_C(188), UINT8_C(188), + UINT8_C(188), UINT8_C(188), UINT8_C(188), UINT8_C(188) }, + { UINT8_C(156), UINT8_C(156), UINT8_C(156), UINT8_C(156), + UINT8_C(156), UINT8_C(156), UINT8_C(156), UINT8_C(156) }, + { UINT8_C(48), UINT8_C(48), UINT8_C(48), UINT8_C(48), + UINT8_C(48), UINT8_C(48), UINT8_C(48), UINT8_C(48) }, + { UINT8_C(9), UINT8_C(9), UINT8_C(9), UINT8_C(9), + UINT8_C(9), UINT8_C(9), UINT8_C(9), UINT8_C(9) } } }, + { { UINT8_C(110), UINT8_C(3), UINT8_C(48), UINT8_C(70) }, + { UINT8_C(139), UINT8_C(154), UINT8_C(160), UINT8_C(139)}, + { { UINT8_C(110), UINT8_C(110), UINT8_C(110), UINT8_C(110), + UINT8_C(110), UINT8_C(110), UINT8_C(110), UINT8_C(110) }, + { UINT8_C(3), UINT8_C(3), UINT8_C(3), UINT8_C(3), + UINT8_C(3), UINT8_C(3), UINT8_C(3), UINT8_C(3) }, + { UINT8_C(48), UINT8_C(48), UINT8_C(48), UINT8_C(48), + UINT8_C(48), UINT8_C(48), UINT8_C(48), UINT8_C(48) }, + { UINT8_C(70), UINT8_C(70), UINT8_C(70), UINT8_C(70), + UINT8_C(70), UINT8_C(70), UINT8_C(70), UINT8_C(70) } } }, + { { UINT8_C(29), UINT8_C(99), UINT8_C(185), UINT8_C(126) }, + { UINT8_C(199), UINT8_C(190), UINT8_C(74), UINT8_C(66)}, + { { UINT8_C(29), UINT8_C(29), UINT8_C(29), UINT8_C(29), + UINT8_C(29), UINT8_C(29), UINT8_C(29), UINT8_C(29) }, + { UINT8_C(99), UINT8_C(99), UINT8_C(99), UINT8_C(99), + UINT8_C(99), UINT8_C(99), UINT8_C(99), UINT8_C(99) }, + { UINT8_C(185), UINT8_C(185), UINT8_C(185), UINT8_C(185), + UINT8_C(185), UINT8_C(185), UINT8_C(185), UINT8_C(185) }, + { UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126), + UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126) } } }, + { { UINT8_C(78), UINT8_C(22), UINT8_C(96), UINT8_C(181) }, + { UINT8_C(7), UINT8_C(36), UINT8_C(58), UINT8_C(159)}, + { { UINT8_C(78), UINT8_C(78), UINT8_C(78), UINT8_C(78), + UINT8_C(78), UINT8_C(78), UINT8_C(78), UINT8_C(78) }, + { UINT8_C(22), UINT8_C(22), UINT8_C(22), UINT8_C(22), + UINT8_C(22), UINT8_C(22), UINT8_C(22), UINT8_C(22) }, + { UINT8_C(96), UINT8_C(96), UINT8_C(96), UINT8_C(96), + UINT8_C(96), UINT8_C(96), UINT8_C(96), UINT8_C(96) }, + { UINT8_C(181), UINT8_C(181), UINT8_C(181), UINT8_C(181), + UINT8_C(181), UINT8_C(181), UINT8_C(181), UINT8_C(181) } } }, + { { UINT8_C(15), UINT8_C(77), UINT8_C(33), UINT8_C(133) }, + { UINT8_C(85), UINT8_C(81), UINT8_C(52), UINT8_C(199)}, + { { UINT8_C(15), UINT8_C(15), UINT8_C(15), UINT8_C(15), + UINT8_C(15), UINT8_C(15), UINT8_C(15), UINT8_C(15) }, + { UINT8_C(77), UINT8_C(77), UINT8_C(77), UINT8_C(77), + UINT8_C(77), UINT8_C(77), UINT8_C(77), UINT8_C(77) }, + { UINT8_C(33), UINT8_C(33), UINT8_C(33), UINT8_C(33), + UINT8_C(33), UINT8_C(33), UINT8_C(33), UINT8_C(33) }, + { UINT8_C(133), UINT8_C(133), UINT8_C(133), UINT8_C(133), + UINT8_C(133), UINT8_C(133), UINT8_C(133), UINT8_C(133) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x8x4_t r = simde_vld4_dup_u8(test_vec[i].a); + simde_uint8x8x4_t expected = { + {simde_vld1_u8(test_vec[i].r[0]), simde_vld1_u8(test_vec[i].r[1]), + simde_vld1_u8(test_vec[i].r[2]), simde_vld1_u8(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_u8x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u8x8(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u8x8(r.val[2], expected.val[2]); + simde_test_arm_neon_assert_equal_u8x8(r.val[3], expected.val[3]); + } + + return 0; +} + +static int +test_simde_vld4_dup_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[4]; + uint16_t unused[4]; + uint16_t r[4][4]; + } test_vec[] = { + { { UINT16_C(15647), UINT16_C(8273), UINT16_C(18121), UINT16_C(3909) }, + { UINT16_C(3886), UINT16_C(18927), UINT16_C(6497), UINT16_C(15716)}, + { { UINT16_C(15647), UINT16_C(15647), UINT16_C(15647), UINT16_C(15647) }, + { UINT16_C(8273), UINT16_C(8273), UINT16_C(8273), UINT16_C(8273) }, + { UINT16_C(18121), UINT16_C(18121), UINT16_C(18121), UINT16_C(18121) }, + { UINT16_C(3909), UINT16_C(3909), UINT16_C(3909), UINT16_C(3909) } } }, + { { UINT16_C(10363), UINT16_C(741), UINT16_C(19314), UINT16_C(207) }, + { UINT16_C(18543), UINT16_C(10812), UINT16_C(4523), UINT16_C(17338)}, + { { UINT16_C(10363), UINT16_C(10363), UINT16_C(10363), UINT16_C(10363) }, + { UINT16_C(741), UINT16_C(741), UINT16_C(741), UINT16_C(741) }, + { UINT16_C(19314), UINT16_C(19314), UINT16_C(19314), UINT16_C(19314) }, + { UINT16_C(207), UINT16_C(207), UINT16_C(207), UINT16_C(207) } } }, + { { UINT16_C(1661), UINT16_C(15340), UINT16_C(9240), UINT16_C(2126) }, + { UINT16_C(19898), UINT16_C(6041), UINT16_C(8131), UINT16_C(5048)}, + { { UINT16_C(1661), UINT16_C(1661), UINT16_C(1661), UINT16_C(1661) }, + { UINT16_C(15340), UINT16_C(15340), UINT16_C(15340), UINT16_C(15340) }, + { UINT16_C(9240), UINT16_C(9240), UINT16_C(9240), UINT16_C(9240) }, + { UINT16_C(2126), UINT16_C(2126), UINT16_C(2126), UINT16_C(2126) } } }, + { { UINT16_C(18793), UINT16_C(9875), UINT16_C(19455), UINT16_C(5867) }, + { UINT16_C(6575), UINT16_C(19585), UINT16_C(56), UINT16_C(13746)}, + { { UINT16_C(18793), UINT16_C(18793), UINT16_C(18793), UINT16_C(18793) }, + { UINT16_C(9875), UINT16_C(9875), UINT16_C(9875), UINT16_C(9875) }, + { UINT16_C(19455), UINT16_C(19455), UINT16_C(19455), UINT16_C(19455) }, + { UINT16_C(5867), UINT16_C(5867), UINT16_C(5867), UINT16_C(5867) } } }, + { { UINT16_C(7315), UINT16_C(11780), UINT16_C(12282), UINT16_C(249) }, + { UINT16_C(2882), UINT16_C(5581), UINT16_C(2639), UINT16_C(13799)}, + { { UINT16_C(7315), UINT16_C(7315), UINT16_C(7315), UINT16_C(7315) }, + { UINT16_C(11780), UINT16_C(11780), UINT16_C(11780), UINT16_C(11780) }, + { UINT16_C(12282), UINT16_C(12282), UINT16_C(12282), UINT16_C(12282) }, + { UINT16_C(249), UINT16_C(249), UINT16_C(249), UINT16_C(249) } } }, + { { UINT16_C(18499), UINT16_C(11815), UINT16_C(19200), UINT16_C(9265) }, + { UINT16_C(17744), UINT16_C(12481), UINT16_C(12307), UINT16_C(15151)}, + { { UINT16_C(18499), UINT16_C(18499), UINT16_C(18499), UINT16_C(18499) }, + { UINT16_C(11815), UINT16_C(11815), UINT16_C(11815), UINT16_C(11815) }, + { UINT16_C(19200), UINT16_C(19200), UINT16_C(19200), UINT16_C(19200) }, + { UINT16_C(9265), UINT16_C(9265), UINT16_C(9265), UINT16_C(9265) } } }, + { { UINT16_C(17166), UINT16_C(17671), UINT16_C(16947), UINT16_C(2009) }, + { UINT16_C(5791), UINT16_C(6900), UINT16_C(7399), UINT16_C(18218)}, + { { UINT16_C(17166), UINT16_C(17166), UINT16_C(17166), UINT16_C(17166) }, + { UINT16_C(17671), UINT16_C(17671), UINT16_C(17671), UINT16_C(17671) }, + { UINT16_C(16947), UINT16_C(16947), UINT16_C(16947), UINT16_C(16947) }, + { UINT16_C(2009), UINT16_C(2009), UINT16_C(2009), UINT16_C(2009) } } }, + { { UINT16_C(14861), UINT16_C(6270), UINT16_C(16359), UINT16_C(971) }, + { UINT16_C(5169), UINT16_C(10261), UINT16_C(5910), UINT16_C(17221)}, + { { UINT16_C(14861), UINT16_C(14861), UINT16_C(14861), UINT16_C(14861) }, + { UINT16_C(6270), UINT16_C(6270), UINT16_C(6270), UINT16_C(6270) }, + { UINT16_C(16359), UINT16_C(16359), UINT16_C(16359), UINT16_C(16359) }, + { UINT16_C(971), UINT16_C(971), UINT16_C(971), UINT16_C(971) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x4x4_t r = simde_vld4_dup_u16(test_vec[i].a); + simde_uint16x4x4_t expected = { + {simde_vld1_u16(test_vec[i].r[0]), simde_vld1_u16(test_vec[i].r[1]), + simde_vld1_u16(test_vec[i].r[2]), simde_vld1_u16(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_u16x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u16x4(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u16x4(r.val[2], expected.val[2]); + simde_test_arm_neon_assert_equal_u16x4(r.val[3], expected.val[3]); + } + + return 0; +} + +static int +test_simde_vld4_dup_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[4]; + uint32_t unused[4]; + uint32_t r[4][2]; + } test_vec[] = { + { { UINT32_C(1580541), UINT32_C(1654270), UINT32_C(1830042), UINT32_C(684375) }, + { UINT32_C(1813447), UINT32_C(569485), UINT32_C(617848), UINT32_C(816228)}, + { { UINT32_C(1580541), UINT32_C(1580541) }, + { UINT32_C(1654270), UINT32_C(1654270) }, + { UINT32_C(1830042), UINT32_C(1830042) }, + { UINT32_C(684375), UINT32_C(684375) } } }, + { { UINT32_C(383439), UINT32_C(1188403), UINT32_C(1689940), UINT32_C(1017721) }, + { UINT32_C(1908956), UINT32_C(228246), UINT32_C(863980), UINT32_C(529514)}, + { { UINT32_C(383439), UINT32_C(383439) }, + { UINT32_C(1188403), UINT32_C(1188403) }, + { UINT32_C(1689940), UINT32_C(1689940) }, + { UINT32_C(1017721), UINT32_C(1017721) } } }, + { { UINT32_C(1641180), UINT32_C(1198501), UINT32_C(101756), UINT32_C(1521669) }, + { UINT32_C(1648251), UINT32_C(989457), UINT32_C(437928), UINT32_C(437115)}, + { { UINT32_C(1641180), UINT32_C(1641180) }, + { UINT32_C(1198501), UINT32_C(1198501) }, + { UINT32_C(101756), UINT32_C(101756) }, + { UINT32_C(1521669), UINT32_C(1521669) } } }, + { { UINT32_C(579695), UINT32_C(353096), UINT32_C(358282), UINT32_C(1828464) }, + { UINT32_C(1574203), UINT32_C(1363087), UINT32_C(222357), UINT32_C(275873)}, + { { UINT32_C(579695), UINT32_C(579695) }, + { UINT32_C(353096), UINT32_C(353096) }, + { UINT32_C(358282), UINT32_C(358282) }, + { UINT32_C(1828464), UINT32_C(1828464) } } }, + { { UINT32_C(1274076), UINT32_C(1744930), UINT32_C(420354), UINT32_C(1584284) }, + { UINT32_C(648997), UINT32_C(1376925), UINT32_C(1522400), UINT32_C(16371)}, + { { UINT32_C(1274076), UINT32_C(1274076) }, + { UINT32_C(1744930), UINT32_C(1744930) }, + { UINT32_C(420354), UINT32_C(420354) }, + { UINT32_C(1584284), UINT32_C(1584284) } } }, + { { UINT32_C(273508), UINT32_C(1725205), UINT32_C(284994), UINT32_C(162209) }, + { UINT32_C(346868), UINT32_C(684962), UINT32_C(673011), UINT32_C(300146)}, + { { UINT32_C(273508), UINT32_C(273508) }, + { UINT32_C(1725205), UINT32_C(1725205) }, + { UINT32_C(284994), UINT32_C(284994) }, + { UINT32_C(162209), UINT32_C(162209) } } }, + { { UINT32_C(1835748), UINT32_C(1712160), UINT32_C(1698734), UINT32_C(1195761) }, + { UINT32_C(1166282), UINT32_C(653660), UINT32_C(997506), UINT32_C(1595720)}, + { { UINT32_C(1835748), UINT32_C(1835748) }, + { UINT32_C(1712160), UINT32_C(1712160) }, + { UINT32_C(1698734), UINT32_C(1698734) }, + { UINT32_C(1195761), UINT32_C(1195761) } } }, + { { UINT32_C(651276), UINT32_C(51314), UINT32_C(370628), UINT32_C(747588) }, + { UINT32_C(1493055), UINT32_C(1665291), UINT32_C(588273), UINT32_C(971166)}, + { { UINT32_C(651276), UINT32_C(651276) }, + { UINT32_C(51314), UINT32_C(51314) }, + { UINT32_C(370628), UINT32_C(370628) }, + { UINT32_C(747588), UINT32_C(747588) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x2x4_t r = simde_vld4_dup_u32(test_vec[i].a); + simde_uint32x2x4_t expected = { + {simde_vld1_u32(test_vec[i].r[0]), simde_vld1_u32(test_vec[i].r[1]), + simde_vld1_u32(test_vec[i].r[2]), simde_vld1_u32(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_u32x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u32x2(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u32x2(r.val[2], expected.val[2]); + simde_test_arm_neon_assert_equal_u32x2(r.val[3], expected.val[3]); + } + + return 0; +} + +static int +test_simde_vld4_dup_u64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[4]; + uint64_t unused[4]; + uint64_t r[4][1]; + } test_vec[] = { + { { UINT64_C(20635395), UINT64_C(181936279), UINT64_C(150662617), UINT64_C(10660679) }, + { UINT64_C(136081342), UINT64_C(93194088), UINT64_C(54652239), UINT64_C(62950287)}, + { { UINT64_C(20635395) }, + { UINT64_C(181936279) }, + { UINT64_C(150662617) }, + { UINT64_C(10660679) } } }, + { { UINT64_C(118865721), UINT64_C(67478211), UINT64_C(174461108), UINT64_C(129320325) }, + { UINT64_C(102278144), UINT64_C(102266682), UINT64_C(49889150), UINT64_C(114118451)}, + { { UINT64_C(118865721) }, + { UINT64_C(67478211) }, + { UINT64_C(174461108) }, + { UINT64_C(129320325) } } }, + { { UINT64_C(158221320), UINT64_C(137081001), UINT64_C(7782842), UINT64_C(7026400) }, + { UINT64_C(177159024), UINT64_C(35276486), UINT64_C(99035872), UINT64_C(158572458)}, + { { UINT64_C(158221320) }, + { UINT64_C(137081001) }, + { UINT64_C(7782842) }, + { UINT64_C(7026400) } } }, + { { UINT64_C(115136670), UINT64_C(35261105), UINT64_C(3380463), UINT64_C(197918324) }, + { UINT64_C(69003240), UINT64_C(32166930), UINT64_C(11534227), UINT64_C(196165286)}, + { { UINT64_C(115136670) }, + { UINT64_C(35261105) }, + { UINT64_C(3380463) }, + { UINT64_C(197918324) } } }, + { { UINT64_C(38362170), UINT64_C(7429842), UINT64_C(80558845), UINT64_C(1179078) }, + { UINT64_C(101090282), UINT64_C(177527039), UINT64_C(180313574), UINT64_C(57773740)}, + { { UINT64_C(38362170) }, + { UINT64_C(7429842) }, + { UINT64_C(80558845) }, + { UINT64_C(1179078) } } }, + { { UINT64_C(59418595), UINT64_C(76632207), UINT64_C(114062680), UINT64_C(87463453) }, + { UINT64_C(184610108), UINT64_C(46058512), UINT64_C(140438761), UINT64_C(194086293)}, + { { UINT64_C(59418595) }, + { UINT64_C(76632207) }, + { UINT64_C(114062680) }, + { UINT64_C(87463453) } } }, + { { UINT64_C(174190462), UINT64_C(48203683), UINT64_C(9296258), UINT64_C(59202338) }, + { UINT64_C(8720854), UINT64_C(20468168), UINT64_C(170092521), UINT64_C(134729317)}, + { { UINT64_C(174190462) }, + { UINT64_C(48203683) }, + { UINT64_C(9296258) }, + { UINT64_C(59202338) } } }, + { { UINT64_C(130903980), UINT64_C(23165997), UINT64_C(74136997), UINT64_C(23457473) }, + { UINT64_C(19601344), UINT64_C(85184699), UINT64_C(189848367), UINT64_C(130732569)}, + { { UINT64_C(130903980) }, + { UINT64_C(23165997) }, + { UINT64_C(74136997) }, + { UINT64_C(23457473) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x1x4_t r = simde_vld4_dup_u64(test_vec[i].a); + simde_uint64x1x4_t expected = { + {simde_vld1_u64(test_vec[i].r[0]), simde_vld1_u64(test_vec[i].r[1]), + simde_vld1_u64(test_vec[i].r[2]), simde_vld1_u64(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_u64x1(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u64x1(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u64x1(r.val[2], expected.val[2]); + simde_test_arm_neon_assert_equal_u64x1(r.val[3], expected.val[3]); + } + + return 0; +} + +static int +test_simde_vld4q_dup_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[4]; + simde_float16_t unused[4]; + simde_float16_t r[4][8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(31.40), SIMDE_FLOAT16_VALUE(8.80), SIMDE_FLOAT16_VALUE(7.89), SIMDE_FLOAT16_VALUE(12.74) }, + { SIMDE_FLOAT16_VALUE(-0.13), SIMDE_FLOAT16_VALUE(-35.35), SIMDE_FLOAT16_VALUE(41.40), SIMDE_FLOAT16_VALUE(18.27)}, + { { SIMDE_FLOAT16_VALUE(31.40), SIMDE_FLOAT16_VALUE(31.40), SIMDE_FLOAT16_VALUE(31.40), SIMDE_FLOAT16_VALUE(31.40), + SIMDE_FLOAT16_VALUE(31.40), SIMDE_FLOAT16_VALUE(31.40), SIMDE_FLOAT16_VALUE(31.40), SIMDE_FLOAT16_VALUE(31.40) }, + { SIMDE_FLOAT16_VALUE(8.80), SIMDE_FLOAT16_VALUE(8.80), SIMDE_FLOAT16_VALUE(8.80), SIMDE_FLOAT16_VALUE(8.80), + SIMDE_FLOAT16_VALUE(8.80), SIMDE_FLOAT16_VALUE(8.80), SIMDE_FLOAT16_VALUE(8.80), SIMDE_FLOAT16_VALUE(8.80) }, + { SIMDE_FLOAT16_VALUE(7.89), SIMDE_FLOAT16_VALUE(7.89), SIMDE_FLOAT16_VALUE(7.89), SIMDE_FLOAT16_VALUE(7.89), + SIMDE_FLOAT16_VALUE(7.89), SIMDE_FLOAT16_VALUE(7.89), SIMDE_FLOAT16_VALUE(7.89), SIMDE_FLOAT16_VALUE(7.89) }, + { SIMDE_FLOAT16_VALUE(12.74), SIMDE_FLOAT16_VALUE(12.74), SIMDE_FLOAT16_VALUE(12.74), SIMDE_FLOAT16_VALUE(12.74), + SIMDE_FLOAT16_VALUE(12.74), SIMDE_FLOAT16_VALUE(12.74), SIMDE_FLOAT16_VALUE(12.74), SIMDE_FLOAT16_VALUE(12.74) } } }, + { { SIMDE_FLOAT16_VALUE(42.40), SIMDE_FLOAT16_VALUE(-29.63), SIMDE_FLOAT16_VALUE(30.06), SIMDE_FLOAT16_VALUE(1.39) }, + { SIMDE_FLOAT16_VALUE(31.88), SIMDE_FLOAT16_VALUE(-49.31), SIMDE_FLOAT16_VALUE(23.96), SIMDE_FLOAT16_VALUE(-45.98)}, + { { SIMDE_FLOAT16_VALUE(42.40), SIMDE_FLOAT16_VALUE(42.40), SIMDE_FLOAT16_VALUE(42.40), SIMDE_FLOAT16_VALUE(42.40), + SIMDE_FLOAT16_VALUE(42.40), SIMDE_FLOAT16_VALUE(42.40), SIMDE_FLOAT16_VALUE(42.40), SIMDE_FLOAT16_VALUE(42.40) }, + { SIMDE_FLOAT16_VALUE(-29.63), SIMDE_FLOAT16_VALUE(-29.63), SIMDE_FLOAT16_VALUE(-29.63), SIMDE_FLOAT16_VALUE(-29.63), + SIMDE_FLOAT16_VALUE(-29.63), SIMDE_FLOAT16_VALUE(-29.63), SIMDE_FLOAT16_VALUE(-29.63), SIMDE_FLOAT16_VALUE(-29.63) }, + { SIMDE_FLOAT16_VALUE(30.06), SIMDE_FLOAT16_VALUE(30.06), SIMDE_FLOAT16_VALUE(30.06), SIMDE_FLOAT16_VALUE(30.06), + SIMDE_FLOAT16_VALUE(30.06), SIMDE_FLOAT16_VALUE(30.06), SIMDE_FLOAT16_VALUE(30.06), SIMDE_FLOAT16_VALUE(30.06) }, + { SIMDE_FLOAT16_VALUE(1.39), SIMDE_FLOAT16_VALUE(1.39), SIMDE_FLOAT16_VALUE(1.39), SIMDE_FLOAT16_VALUE(1.39), + SIMDE_FLOAT16_VALUE(1.39), SIMDE_FLOAT16_VALUE(1.39), SIMDE_FLOAT16_VALUE(1.39), SIMDE_FLOAT16_VALUE(1.39) } } }, + { { SIMDE_FLOAT16_VALUE(-19.20), SIMDE_FLOAT16_VALUE(-9.05), SIMDE_FLOAT16_VALUE(0.57), SIMDE_FLOAT16_VALUE(14.82) }, + { SIMDE_FLOAT16_VALUE(-38.07), SIMDE_FLOAT16_VALUE(-33.07), SIMDE_FLOAT16_VALUE(15.20), SIMDE_FLOAT16_VALUE(3.67)}, + { { SIMDE_FLOAT16_VALUE(-19.20), SIMDE_FLOAT16_VALUE(-19.20), SIMDE_FLOAT16_VALUE(-19.20), SIMDE_FLOAT16_VALUE(-19.20), + SIMDE_FLOAT16_VALUE(-19.20), SIMDE_FLOAT16_VALUE(-19.20), SIMDE_FLOAT16_VALUE(-19.20), SIMDE_FLOAT16_VALUE(-19.20) }, + { SIMDE_FLOAT16_VALUE(-9.05), SIMDE_FLOAT16_VALUE(-9.05), SIMDE_FLOAT16_VALUE(-9.05), SIMDE_FLOAT16_VALUE(-9.05), + SIMDE_FLOAT16_VALUE(-9.05), SIMDE_FLOAT16_VALUE(-9.05), SIMDE_FLOAT16_VALUE(-9.05), SIMDE_FLOAT16_VALUE(-9.05) }, + { SIMDE_FLOAT16_VALUE(0.57), SIMDE_FLOAT16_VALUE(0.57), SIMDE_FLOAT16_VALUE(0.57), SIMDE_FLOAT16_VALUE(0.57), + SIMDE_FLOAT16_VALUE(0.57), SIMDE_FLOAT16_VALUE(0.57), SIMDE_FLOAT16_VALUE(0.57), SIMDE_FLOAT16_VALUE(0.57) }, + { SIMDE_FLOAT16_VALUE(14.82), SIMDE_FLOAT16_VALUE(14.82), SIMDE_FLOAT16_VALUE(14.82), SIMDE_FLOAT16_VALUE(14.82), + SIMDE_FLOAT16_VALUE(14.82), SIMDE_FLOAT16_VALUE(14.82), SIMDE_FLOAT16_VALUE(14.82), SIMDE_FLOAT16_VALUE(14.82) } } }, + { { SIMDE_FLOAT16_VALUE(-12.11), SIMDE_FLOAT16_VALUE(15.24), SIMDE_FLOAT16_VALUE(34.23), SIMDE_FLOAT16_VALUE(38.58) }, + { SIMDE_FLOAT16_VALUE(46.28), SIMDE_FLOAT16_VALUE(-11.22), SIMDE_FLOAT16_VALUE(21.51), SIMDE_FLOAT16_VALUE(-3.08)}, + { { SIMDE_FLOAT16_VALUE(-12.11), SIMDE_FLOAT16_VALUE(-12.11), SIMDE_FLOAT16_VALUE(-12.11), SIMDE_FLOAT16_VALUE(-12.11), + SIMDE_FLOAT16_VALUE(-12.11), SIMDE_FLOAT16_VALUE(-12.11), SIMDE_FLOAT16_VALUE(-12.11), SIMDE_FLOAT16_VALUE(-12.11) }, + { SIMDE_FLOAT16_VALUE(15.24), SIMDE_FLOAT16_VALUE(15.24), SIMDE_FLOAT16_VALUE(15.24), SIMDE_FLOAT16_VALUE(15.24), + SIMDE_FLOAT16_VALUE(15.24), SIMDE_FLOAT16_VALUE(15.24), SIMDE_FLOAT16_VALUE(15.24), SIMDE_FLOAT16_VALUE(15.24) }, + { SIMDE_FLOAT16_VALUE(34.23), SIMDE_FLOAT16_VALUE(34.23), SIMDE_FLOAT16_VALUE(34.23), SIMDE_FLOAT16_VALUE(34.23), + SIMDE_FLOAT16_VALUE(34.23), SIMDE_FLOAT16_VALUE(34.23), SIMDE_FLOAT16_VALUE(34.23), SIMDE_FLOAT16_VALUE(34.23) }, + { SIMDE_FLOAT16_VALUE(38.58), SIMDE_FLOAT16_VALUE(38.58), SIMDE_FLOAT16_VALUE(38.58), SIMDE_FLOAT16_VALUE(38.58), + SIMDE_FLOAT16_VALUE(38.58), SIMDE_FLOAT16_VALUE(38.58), SIMDE_FLOAT16_VALUE(38.58), SIMDE_FLOAT16_VALUE(38.58) } } }, + { { SIMDE_FLOAT16_VALUE(-16.90), SIMDE_FLOAT16_VALUE(45.87), SIMDE_FLOAT16_VALUE(4.55), SIMDE_FLOAT16_VALUE(-31.32) }, + { SIMDE_FLOAT16_VALUE(-4.72), SIMDE_FLOAT16_VALUE(28.72), SIMDE_FLOAT16_VALUE(26.60), SIMDE_FLOAT16_VALUE(-42.54)}, + { { SIMDE_FLOAT16_VALUE(-16.90), SIMDE_FLOAT16_VALUE(-16.90), SIMDE_FLOAT16_VALUE(-16.90), SIMDE_FLOAT16_VALUE(-16.90), + SIMDE_FLOAT16_VALUE(-16.90), SIMDE_FLOAT16_VALUE(-16.90), SIMDE_FLOAT16_VALUE(-16.90), SIMDE_FLOAT16_VALUE(-16.90) }, + { SIMDE_FLOAT16_VALUE(45.87), SIMDE_FLOAT16_VALUE(45.87), SIMDE_FLOAT16_VALUE(45.87), SIMDE_FLOAT16_VALUE(45.87), + SIMDE_FLOAT16_VALUE(45.87), SIMDE_FLOAT16_VALUE(45.87), SIMDE_FLOAT16_VALUE(45.87), SIMDE_FLOAT16_VALUE(45.87) }, + { SIMDE_FLOAT16_VALUE(4.55), SIMDE_FLOAT16_VALUE(4.55), SIMDE_FLOAT16_VALUE(4.55), SIMDE_FLOAT16_VALUE(4.55), + SIMDE_FLOAT16_VALUE(4.55), SIMDE_FLOAT16_VALUE(4.55), SIMDE_FLOAT16_VALUE(4.55), SIMDE_FLOAT16_VALUE(4.55) }, + { SIMDE_FLOAT16_VALUE(-31.32), SIMDE_FLOAT16_VALUE(-31.32), SIMDE_FLOAT16_VALUE(-31.32), SIMDE_FLOAT16_VALUE(-31.32), + SIMDE_FLOAT16_VALUE(-31.32), SIMDE_FLOAT16_VALUE(-31.32), SIMDE_FLOAT16_VALUE(-31.32), SIMDE_FLOAT16_VALUE(-31.32) } } }, + { { SIMDE_FLOAT16_VALUE(19.79), SIMDE_FLOAT16_VALUE(20.65), SIMDE_FLOAT16_VALUE(-41.73), SIMDE_FLOAT16_VALUE(27.61) }, + { SIMDE_FLOAT16_VALUE(-46.55), SIMDE_FLOAT16_VALUE(-27.42), SIMDE_FLOAT16_VALUE(0.86), SIMDE_FLOAT16_VALUE(11.71)}, + { { SIMDE_FLOAT16_VALUE(19.79), SIMDE_FLOAT16_VALUE(19.79), SIMDE_FLOAT16_VALUE(19.79), SIMDE_FLOAT16_VALUE(19.79), + SIMDE_FLOAT16_VALUE(19.79), SIMDE_FLOAT16_VALUE(19.79), SIMDE_FLOAT16_VALUE(19.79), SIMDE_FLOAT16_VALUE(19.79) }, + { SIMDE_FLOAT16_VALUE(20.65), SIMDE_FLOAT16_VALUE(20.65), SIMDE_FLOAT16_VALUE(20.65), SIMDE_FLOAT16_VALUE(20.65), + SIMDE_FLOAT16_VALUE(20.65), SIMDE_FLOAT16_VALUE(20.65), SIMDE_FLOAT16_VALUE(20.65), SIMDE_FLOAT16_VALUE(20.65) }, + { SIMDE_FLOAT16_VALUE(-41.73), SIMDE_FLOAT16_VALUE(-41.73), SIMDE_FLOAT16_VALUE(-41.73), SIMDE_FLOAT16_VALUE(-41.73), + SIMDE_FLOAT16_VALUE(-41.73), SIMDE_FLOAT16_VALUE(-41.73), SIMDE_FLOAT16_VALUE(-41.73), SIMDE_FLOAT16_VALUE(-41.73) }, + { SIMDE_FLOAT16_VALUE(27.61), SIMDE_FLOAT16_VALUE(27.61), SIMDE_FLOAT16_VALUE(27.61), SIMDE_FLOAT16_VALUE(27.61), + SIMDE_FLOAT16_VALUE(27.61), SIMDE_FLOAT16_VALUE(27.61), SIMDE_FLOAT16_VALUE(27.61), SIMDE_FLOAT16_VALUE(27.61) } } }, + { { SIMDE_FLOAT16_VALUE(33.12), SIMDE_FLOAT16_VALUE(-27.38), SIMDE_FLOAT16_VALUE(15.63), SIMDE_FLOAT16_VALUE(-26.25) }, + { SIMDE_FLOAT16_VALUE(-46.48), SIMDE_FLOAT16_VALUE(13.65), SIMDE_FLOAT16_VALUE(3.92), SIMDE_FLOAT16_VALUE(39.42)}, + { { SIMDE_FLOAT16_VALUE(33.12), SIMDE_FLOAT16_VALUE(33.12), SIMDE_FLOAT16_VALUE(33.12), SIMDE_FLOAT16_VALUE(33.12), + SIMDE_FLOAT16_VALUE(33.12), SIMDE_FLOAT16_VALUE(33.12), SIMDE_FLOAT16_VALUE(33.12), SIMDE_FLOAT16_VALUE(33.12) }, + { SIMDE_FLOAT16_VALUE(-27.38), SIMDE_FLOAT16_VALUE(-27.38), SIMDE_FLOAT16_VALUE(-27.38), SIMDE_FLOAT16_VALUE(-27.38), + SIMDE_FLOAT16_VALUE(-27.38), SIMDE_FLOAT16_VALUE(-27.38), SIMDE_FLOAT16_VALUE(-27.38), SIMDE_FLOAT16_VALUE(-27.38) }, + { SIMDE_FLOAT16_VALUE(15.63), SIMDE_FLOAT16_VALUE(15.63), SIMDE_FLOAT16_VALUE(15.63), SIMDE_FLOAT16_VALUE(15.63), + SIMDE_FLOAT16_VALUE(15.63), SIMDE_FLOAT16_VALUE(15.63), SIMDE_FLOAT16_VALUE(15.63), SIMDE_FLOAT16_VALUE(15.63) }, + { SIMDE_FLOAT16_VALUE(-26.25), SIMDE_FLOAT16_VALUE(-26.25), SIMDE_FLOAT16_VALUE(-26.25), SIMDE_FLOAT16_VALUE(-26.25), + SIMDE_FLOAT16_VALUE(-26.25), SIMDE_FLOAT16_VALUE(-26.25), SIMDE_FLOAT16_VALUE(-26.25), SIMDE_FLOAT16_VALUE(-26.25) } } }, + { { SIMDE_FLOAT16_VALUE(-9.54), SIMDE_FLOAT16_VALUE(-43.91), SIMDE_FLOAT16_VALUE(34.76), SIMDE_FLOAT16_VALUE(30.47) }, + { SIMDE_FLOAT16_VALUE(44.27), SIMDE_FLOAT16_VALUE(45.27), SIMDE_FLOAT16_VALUE(47.81), SIMDE_FLOAT16_VALUE(10.39)}, + { { SIMDE_FLOAT16_VALUE(-9.54), SIMDE_FLOAT16_VALUE(-9.54), SIMDE_FLOAT16_VALUE(-9.54), SIMDE_FLOAT16_VALUE(-9.54), + SIMDE_FLOAT16_VALUE(-9.54), SIMDE_FLOAT16_VALUE(-9.54), SIMDE_FLOAT16_VALUE(-9.54), SIMDE_FLOAT16_VALUE(-9.54) }, + { SIMDE_FLOAT16_VALUE(-43.91), SIMDE_FLOAT16_VALUE(-43.91), SIMDE_FLOAT16_VALUE(-43.91), SIMDE_FLOAT16_VALUE(-43.91), + SIMDE_FLOAT16_VALUE(-43.91), SIMDE_FLOAT16_VALUE(-43.91), SIMDE_FLOAT16_VALUE(-43.91), SIMDE_FLOAT16_VALUE(-43.91) }, + { SIMDE_FLOAT16_VALUE(34.76), SIMDE_FLOAT16_VALUE(34.76), SIMDE_FLOAT16_VALUE(34.76), SIMDE_FLOAT16_VALUE(34.76), + SIMDE_FLOAT16_VALUE(34.76), SIMDE_FLOAT16_VALUE(34.76), SIMDE_FLOAT16_VALUE(34.76), SIMDE_FLOAT16_VALUE(34.76) }, + { SIMDE_FLOAT16_VALUE(30.47), SIMDE_FLOAT16_VALUE(30.47), SIMDE_FLOAT16_VALUE(30.47), SIMDE_FLOAT16_VALUE(30.47), + SIMDE_FLOAT16_VALUE(30.47), SIMDE_FLOAT16_VALUE(30.47), SIMDE_FLOAT16_VALUE(30.47), SIMDE_FLOAT16_VALUE(30.47) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8x4_t r = simde_vld4q_dup_f16(test_vec[i].a); + simde_float16x8x4_t expected = { + {simde_vld1q_f16(test_vec[i].r[0]), simde_vld1q_f16(test_vec[i].r[1]), + simde_vld1q_f16(test_vec[i].r[2]), simde_vld1q_f16(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_f16x8(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f16x8(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f16x8(r.val[2], expected.val[2], INT_MAX); + simde_test_arm_neon_assert_equal_f16x8(r.val[3], expected.val[3], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld4q_dup_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + float a[4]; + float unused[4]; + float r[4][4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(-3861.60), SIMDE_FLOAT32_C(1139.29), SIMDE_FLOAT32_C(814.20), SIMDE_FLOAT32_C(3375.56) }, + { SIMDE_FLOAT32_C(-150.56), SIMDE_FLOAT32_C(4160.55), SIMDE_FLOAT32_C(2937.14), SIMDE_FLOAT32_C(3001.41)}, + { { SIMDE_FLOAT32_C(-3861.60), SIMDE_FLOAT32_C(-3861.60), SIMDE_FLOAT32_C(-3861.60), SIMDE_FLOAT32_C(-3861.60) }, + { SIMDE_FLOAT32_C(1139.29), SIMDE_FLOAT32_C(1139.29), SIMDE_FLOAT32_C(1139.29), SIMDE_FLOAT32_C(1139.29) }, + { SIMDE_FLOAT32_C(814.20), SIMDE_FLOAT32_C(814.20), SIMDE_FLOAT32_C(814.20), SIMDE_FLOAT32_C(814.20) }, + { SIMDE_FLOAT32_C(3375.56), SIMDE_FLOAT32_C(3375.56), SIMDE_FLOAT32_C(3375.56), SIMDE_FLOAT32_C(3375.56) } } }, + { { SIMDE_FLOAT32_C(805.49), SIMDE_FLOAT32_C(-4528.24), SIMDE_FLOAT32_C(3834.36), SIMDE_FLOAT32_C(57.43) }, + { SIMDE_FLOAT32_C(-3865.32), SIMDE_FLOAT32_C(-2417.22), SIMDE_FLOAT32_C(1872.65), SIMDE_FLOAT32_C(4313.25)}, + { { SIMDE_FLOAT32_C(805.49), SIMDE_FLOAT32_C(805.49), SIMDE_FLOAT32_C(805.49), SIMDE_FLOAT32_C(805.49) }, + { SIMDE_FLOAT32_C(-4528.24), SIMDE_FLOAT32_C(-4528.24), SIMDE_FLOAT32_C(-4528.24), SIMDE_FLOAT32_C(-4528.24) }, + { SIMDE_FLOAT32_C(3834.36), SIMDE_FLOAT32_C(3834.36), SIMDE_FLOAT32_C(3834.36), SIMDE_FLOAT32_C(3834.36) }, + { SIMDE_FLOAT32_C(57.43), SIMDE_FLOAT32_C(57.43), SIMDE_FLOAT32_C(57.43), SIMDE_FLOAT32_C(57.43) } } }, + { { SIMDE_FLOAT32_C(-2714.45), SIMDE_FLOAT32_C(496.22), SIMDE_FLOAT32_C(1544.09), SIMDE_FLOAT32_C(4179.89) }, + { SIMDE_FLOAT32_C(3908.86), SIMDE_FLOAT32_C(-4458.82), SIMDE_FLOAT32_C(-4099.24), SIMDE_FLOAT32_C(3128.99)}, + { { SIMDE_FLOAT32_C(-2714.45), SIMDE_FLOAT32_C(-2714.45), SIMDE_FLOAT32_C(-2714.45), SIMDE_FLOAT32_C(-2714.45) }, + { SIMDE_FLOAT32_C(496.22), SIMDE_FLOAT32_C(496.22), SIMDE_FLOAT32_C(496.22), SIMDE_FLOAT32_C(496.22) }, + { SIMDE_FLOAT32_C(1544.09), SIMDE_FLOAT32_C(1544.09), SIMDE_FLOAT32_C(1544.09), SIMDE_FLOAT32_C(1544.09) }, + { SIMDE_FLOAT32_C(4179.89), SIMDE_FLOAT32_C(4179.89), SIMDE_FLOAT32_C(4179.89), SIMDE_FLOAT32_C(4179.89) } } }, + { { SIMDE_FLOAT32_C(2851.07), SIMDE_FLOAT32_C(-1716.43), SIMDE_FLOAT32_C(-4967.83), SIMDE_FLOAT32_C(-2269.08) }, + { SIMDE_FLOAT32_C(3455.94), SIMDE_FLOAT32_C(719.08), SIMDE_FLOAT32_C(-854.44), SIMDE_FLOAT32_C(2913.17)}, + { { SIMDE_FLOAT32_C(2851.07), SIMDE_FLOAT32_C(2851.07), SIMDE_FLOAT32_C(2851.07), SIMDE_FLOAT32_C(2851.07) }, + { SIMDE_FLOAT32_C(-1716.43), SIMDE_FLOAT32_C(-1716.43), SIMDE_FLOAT32_C(-1716.43), SIMDE_FLOAT32_C(-1716.43) }, + { SIMDE_FLOAT32_C(-4967.83), SIMDE_FLOAT32_C(-4967.83), SIMDE_FLOAT32_C(-4967.83), SIMDE_FLOAT32_C(-4967.83) }, + { SIMDE_FLOAT32_C(-2269.08), SIMDE_FLOAT32_C(-2269.08), SIMDE_FLOAT32_C(-2269.08), SIMDE_FLOAT32_C(-2269.08) } } }, + { { SIMDE_FLOAT32_C(-3383.87), SIMDE_FLOAT32_C(-2809.13), SIMDE_FLOAT32_C(-2728.33), SIMDE_FLOAT32_C(-2481.28) }, + { SIMDE_FLOAT32_C(2154.97), SIMDE_FLOAT32_C(2696.19), SIMDE_FLOAT32_C(-1729.45), SIMDE_FLOAT32_C(-4102.35)}, + { { SIMDE_FLOAT32_C(-3383.87), SIMDE_FLOAT32_C(-3383.87), SIMDE_FLOAT32_C(-3383.87), SIMDE_FLOAT32_C(-3383.87) }, + { SIMDE_FLOAT32_C(-2809.13), SIMDE_FLOAT32_C(-2809.13), SIMDE_FLOAT32_C(-2809.13), SIMDE_FLOAT32_C(-2809.13) }, + { SIMDE_FLOAT32_C(-2728.33), SIMDE_FLOAT32_C(-2728.33), SIMDE_FLOAT32_C(-2728.33), SIMDE_FLOAT32_C(-2728.33) }, + { SIMDE_FLOAT32_C(-2481.28), SIMDE_FLOAT32_C(-2481.28), SIMDE_FLOAT32_C(-2481.28), SIMDE_FLOAT32_C(-2481.28) } } }, + { { SIMDE_FLOAT32_C(1690.98), SIMDE_FLOAT32_C(-2798.40), SIMDE_FLOAT32_C(2781.03), SIMDE_FLOAT32_C(-4301.72) }, + { SIMDE_FLOAT32_C(3345.82), SIMDE_FLOAT32_C(-2334.78), SIMDE_FLOAT32_C(-2152.95), SIMDE_FLOAT32_C(4461.78)}, + { { SIMDE_FLOAT32_C(1690.98), SIMDE_FLOAT32_C(1690.98), SIMDE_FLOAT32_C(1690.98), SIMDE_FLOAT32_C(1690.98) }, + { SIMDE_FLOAT32_C(-2798.40), SIMDE_FLOAT32_C(-2798.40), SIMDE_FLOAT32_C(-2798.40), SIMDE_FLOAT32_C(-2798.40) }, + { SIMDE_FLOAT32_C(2781.03), SIMDE_FLOAT32_C(2781.03), SIMDE_FLOAT32_C(2781.03), SIMDE_FLOAT32_C(2781.03) }, + { SIMDE_FLOAT32_C(-4301.72), SIMDE_FLOAT32_C(-4301.72), SIMDE_FLOAT32_C(-4301.72), SIMDE_FLOAT32_C(-4301.72) } } }, + { { SIMDE_FLOAT32_C(3849.23), SIMDE_FLOAT32_C(-2373.31), SIMDE_FLOAT32_C(-2655.60), SIMDE_FLOAT32_C(-4271.80) }, + { SIMDE_FLOAT32_C(-1886.25), SIMDE_FLOAT32_C(-3798.07), SIMDE_FLOAT32_C(98.28), SIMDE_FLOAT32_C(-4138.60)}, + { { SIMDE_FLOAT32_C(3849.23), SIMDE_FLOAT32_C(3849.23), SIMDE_FLOAT32_C(3849.23), SIMDE_FLOAT32_C(3849.23) }, + { SIMDE_FLOAT32_C(-2373.31), SIMDE_FLOAT32_C(-2373.31), SIMDE_FLOAT32_C(-2373.31), SIMDE_FLOAT32_C(-2373.31) }, + { SIMDE_FLOAT32_C(-2655.60), SIMDE_FLOAT32_C(-2655.60), SIMDE_FLOAT32_C(-2655.60), SIMDE_FLOAT32_C(-2655.60) }, + { SIMDE_FLOAT32_C(-4271.80), SIMDE_FLOAT32_C(-4271.80), SIMDE_FLOAT32_C(-4271.80), SIMDE_FLOAT32_C(-4271.80) } } }, + { { SIMDE_FLOAT32_C(3103.14), SIMDE_FLOAT32_C(188.95), SIMDE_FLOAT32_C(730.17), SIMDE_FLOAT32_C(4115.93) }, + { SIMDE_FLOAT32_C(270.70), SIMDE_FLOAT32_C(260.43), SIMDE_FLOAT32_C(-1629.46), SIMDE_FLOAT32_C(1068.15)}, + { { SIMDE_FLOAT32_C(3103.14), SIMDE_FLOAT32_C(3103.14), SIMDE_FLOAT32_C(3103.14), SIMDE_FLOAT32_C(3103.14) }, + { SIMDE_FLOAT32_C(188.95), SIMDE_FLOAT32_C(188.95), SIMDE_FLOAT32_C(188.95), SIMDE_FLOAT32_C(188.95) }, + { SIMDE_FLOAT32_C(730.17), SIMDE_FLOAT32_C(730.17), SIMDE_FLOAT32_C(730.17), SIMDE_FLOAT32_C(730.17) }, + { SIMDE_FLOAT32_C(4115.93), SIMDE_FLOAT32_C(4115.93), SIMDE_FLOAT32_C(4115.93), SIMDE_FLOAT32_C(4115.93) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4x4_t r = simde_vld4q_dup_f32(test_vec[i].a); + simde_float32x4x4_t expected = { + {simde_vld1q_f32(test_vec[i].r[0]), simde_vld1q_f32(test_vec[i].r[1]), + simde_vld1q_f32(test_vec[i].r[2]), simde_vld1q_f32(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_f32x4(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f32x4(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f32x4(r.val[2], expected.val[2], INT_MAX); + simde_test_arm_neon_assert_equal_f32x4(r.val[3], expected.val[3], INT_MAX); + } + + return 0; +} + + +static int +test_simde_vld4q_dup_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64 a[4]; + simde_float64 unused[4]; + simde_float64 r[4][2]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(301939.25), SIMDE_FLOAT64_C(31180.53), SIMDE_FLOAT64_C(328514.37), SIMDE_FLOAT64_C(-403632.52) }, + { SIMDE_FLOAT64_C(228223.39), SIMDE_FLOAT64_C(65131.77), SIMDE_FLOAT64_C(-234702.56), SIMDE_FLOAT64_C(164177.40)}, + { { SIMDE_FLOAT64_C(301939.25), SIMDE_FLOAT64_C(301939.25) }, + { SIMDE_FLOAT64_C(31180.53), SIMDE_FLOAT64_C(31180.53) }, + { SIMDE_FLOAT64_C(328514.37), SIMDE_FLOAT64_C(328514.37) }, + { SIMDE_FLOAT64_C(-403632.52), SIMDE_FLOAT64_C(-403632.52) } } }, + { { SIMDE_FLOAT64_C(56986.01), SIMDE_FLOAT64_C(283724.36), SIMDE_FLOAT64_C(155364.01), SIMDE_FLOAT64_C(-415889.92) }, + { SIMDE_FLOAT64_C(-414762.25), SIMDE_FLOAT64_C(-93809.32), SIMDE_FLOAT64_C(28559.25), SIMDE_FLOAT64_C(-291618.76)}, + { { SIMDE_FLOAT64_C(56986.01), SIMDE_FLOAT64_C(56986.01) }, + { SIMDE_FLOAT64_C(283724.36), SIMDE_FLOAT64_C(283724.36) }, + { SIMDE_FLOAT64_C(155364.01), SIMDE_FLOAT64_C(155364.01) }, + { SIMDE_FLOAT64_C(-415889.92), SIMDE_FLOAT64_C(-415889.92) } } }, + { { SIMDE_FLOAT64_C(-284581.10), SIMDE_FLOAT64_C(50140.85), SIMDE_FLOAT64_C(136310.26), SIMDE_FLOAT64_C(448266.40) }, + { SIMDE_FLOAT64_C(266934.08), SIMDE_FLOAT64_C(207025.51), SIMDE_FLOAT64_C(-32860.95), SIMDE_FLOAT64_C(-59684.94)}, + { { SIMDE_FLOAT64_C(-284581.10), SIMDE_FLOAT64_C(-284581.10) }, + { SIMDE_FLOAT64_C(50140.85), SIMDE_FLOAT64_C(50140.85) }, + { SIMDE_FLOAT64_C(136310.26), SIMDE_FLOAT64_C(136310.26) }, + { SIMDE_FLOAT64_C(448266.40), SIMDE_FLOAT64_C(448266.40) } } }, + { { SIMDE_FLOAT64_C(-405893.56), SIMDE_FLOAT64_C(259839.71), SIMDE_FLOAT64_C(422741.48), SIMDE_FLOAT64_C(-225178.67) }, + { SIMDE_FLOAT64_C(-127457.76), SIMDE_FLOAT64_C(-23597.33), SIMDE_FLOAT64_C(372570.03), SIMDE_FLOAT64_C(-368106.17)}, + { { SIMDE_FLOAT64_C(-405893.56), SIMDE_FLOAT64_C(-405893.56) }, + { SIMDE_FLOAT64_C(259839.71), SIMDE_FLOAT64_C(259839.71) }, + { SIMDE_FLOAT64_C(422741.48), SIMDE_FLOAT64_C(422741.48) }, + { SIMDE_FLOAT64_C(-225178.67), SIMDE_FLOAT64_C(-225178.67) } } }, + { { SIMDE_FLOAT64_C(29202.40), SIMDE_FLOAT64_C(-476185.70), SIMDE_FLOAT64_C(350122.87), SIMDE_FLOAT64_C(440133.57) }, + { SIMDE_FLOAT64_C(-333398.19), SIMDE_FLOAT64_C(-420427.97), SIMDE_FLOAT64_C(124909.95), SIMDE_FLOAT64_C(364853.02)}, + { { SIMDE_FLOAT64_C(29202.40), SIMDE_FLOAT64_C(29202.40) }, + { SIMDE_FLOAT64_C(-476185.70), SIMDE_FLOAT64_C(-476185.70) }, + { SIMDE_FLOAT64_C(350122.87), SIMDE_FLOAT64_C(350122.87) }, + { SIMDE_FLOAT64_C(440133.57), SIMDE_FLOAT64_C(440133.57) } } }, + { { SIMDE_FLOAT64_C(281268.20), SIMDE_FLOAT64_C(-21602.25), SIMDE_FLOAT64_C(64507.47), SIMDE_FLOAT64_C(365563.55) }, + { SIMDE_FLOAT64_C(270302.58), SIMDE_FLOAT64_C(-156251.88), SIMDE_FLOAT64_C(164546.89), SIMDE_FLOAT64_C(-173037.65)}, + { { SIMDE_FLOAT64_C(281268.20), SIMDE_FLOAT64_C(281268.20) }, + { SIMDE_FLOAT64_C(-21602.25), SIMDE_FLOAT64_C(-21602.25) }, + { SIMDE_FLOAT64_C(64507.47), SIMDE_FLOAT64_C(64507.47) }, + { SIMDE_FLOAT64_C(365563.55), SIMDE_FLOAT64_C(365563.55) } } }, + { { SIMDE_FLOAT64_C(236563.73), SIMDE_FLOAT64_C(298156.49), SIMDE_FLOAT64_C(-463451.58), SIMDE_FLOAT64_C(246608.44) }, + { SIMDE_FLOAT64_C(-217371.80), SIMDE_FLOAT64_C(113655.63), SIMDE_FLOAT64_C(402255.30), SIMDE_FLOAT64_C(449192.11)}, + { { SIMDE_FLOAT64_C(236563.73), SIMDE_FLOAT64_C(236563.73) }, + { SIMDE_FLOAT64_C(298156.49), SIMDE_FLOAT64_C(298156.49) }, + { SIMDE_FLOAT64_C(-463451.58), SIMDE_FLOAT64_C(-463451.58) }, + { SIMDE_FLOAT64_C(246608.44), SIMDE_FLOAT64_C(246608.44) } } }, + { { SIMDE_FLOAT64_C(-389102.28), SIMDE_FLOAT64_C(228365.87), SIMDE_FLOAT64_C(81756.79), SIMDE_FLOAT64_C(-398903.35) }, + { SIMDE_FLOAT64_C(492074.05), SIMDE_FLOAT64_C(471641.95), SIMDE_FLOAT64_C(-224322.91), SIMDE_FLOAT64_C(498142.32)}, + { { SIMDE_FLOAT64_C(-389102.28), SIMDE_FLOAT64_C(-389102.28) }, + { SIMDE_FLOAT64_C(228365.87), SIMDE_FLOAT64_C(228365.87) }, + { SIMDE_FLOAT64_C(81756.79), SIMDE_FLOAT64_C(81756.79) }, + { SIMDE_FLOAT64_C(-398903.35), SIMDE_FLOAT64_C(-398903.35) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2x4_t r = simde_vld4q_dup_f64(test_vec[i].a); + simde_float64x2x4_t expected = { + {simde_vld1q_f64(test_vec[i].r[0]), simde_vld1q_f64(test_vec[i].r[1]), + simde_vld1q_f64(test_vec[i].r[2]), simde_vld1q_f64(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_f64x2(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f64x2(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f64x2(r.val[2], expected.val[2], INT_MAX); + simde_test_arm_neon_assert_equal_f64x2(r.val[3], expected.val[3], INT_MAX); + } + + return 0; +} + +static int +test_simde_vld4q_dup_s8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int8_t a[4]; + int8_t unused[4]; + int8_t r[4][16]; + } test_vec[] = { + { { INT8_C(17), -INT8_C(53), INT8_C(76), INT8_C(86) }, + { INT8_C(72), -INT8_C(98), INT8_C(32), -INT8_C(67)}, + { { INT8_C(17), INT8_C(17), INT8_C(17), INT8_C(17), INT8_C(17), INT8_C(17), INT8_C(17), INT8_C(17), + INT8_C(17), INT8_C(17), INT8_C(17), INT8_C(17), INT8_C(17), INT8_C(17), INT8_C(17), INT8_C(17) }, + { -INT8_C(53), -INT8_C(53), -INT8_C(53), -INT8_C(53), -INT8_C(53), -INT8_C(53), -INT8_C(53), -INT8_C(53), + -INT8_C(53), -INT8_C(53), -INT8_C(53), -INT8_C(53), -INT8_C(53), -INT8_C(53), -INT8_C(53), -INT8_C(53) }, + { INT8_C(76), INT8_C(76), INT8_C(76), INT8_C(76), INT8_C(76), INT8_C(76), INT8_C(76), INT8_C(76), + INT8_C(76), INT8_C(76), INT8_C(76), INT8_C(76), INT8_C(76), INT8_C(76), INT8_C(76), INT8_C(76) }, + { INT8_C(86), INT8_C(86), INT8_C(86), INT8_C(86), INT8_C(86), INT8_C(86), INT8_C(86), INT8_C(86), + INT8_C(86), INT8_C(86), INT8_C(86), INT8_C(86), INT8_C(86), INT8_C(86), INT8_C(86), INT8_C(86) } } }, + { { -INT8_C(42), INT8_C(69), INT8_C(0), -INT8_C(20) }, + { INT8_C(2), INT8_C(22), INT8_C(50), INT8_C(81)}, + { { -INT8_C(42), -INT8_C(42), -INT8_C(42), -INT8_C(42), -INT8_C(42), -INT8_C(42), -INT8_C(42), -INT8_C(42), + -INT8_C(42), -INT8_C(42), -INT8_C(42), -INT8_C(42), -INT8_C(42), -INT8_C(42), -INT8_C(42), -INT8_C(42) }, + { INT8_C(69), INT8_C(69), INT8_C(69), INT8_C(69), INT8_C(69), INT8_C(69), INT8_C(69), INT8_C(69), + INT8_C(69), INT8_C(69), INT8_C(69), INT8_C(69), INT8_C(69), INT8_C(69), INT8_C(69), INT8_C(69) }, + { INT8_C(0), INT8_C(0), INT8_C(0), INT8_C(0), INT8_C(0), INT8_C(0), INT8_C(0), INT8_C(0), + INT8_C(0), INT8_C(0), INT8_C(0), INT8_C(0), INT8_C(0), INT8_C(0), INT8_C(0), INT8_C(0) }, + { -INT8_C(20), -INT8_C(20), -INT8_C(20), -INT8_C(20), -INT8_C(20), -INT8_C(20), -INT8_C(20), -INT8_C(20), + -INT8_C(20), -INT8_C(20), -INT8_C(20), -INT8_C(20), -INT8_C(20), -INT8_C(20), -INT8_C(20), -INT8_C(20) } } }, + { { INT8_C(81), -INT8_C(85), INT8_C(69), INT8_C(85) }, + { -INT8_C(25), INT8_C(84), -INT8_C(85), -INT8_C(82)}, + { { INT8_C(81), INT8_C(81), INT8_C(81), INT8_C(81), INT8_C(81), INT8_C(81), INT8_C(81), INT8_C(81), + INT8_C(81), INT8_C(81), INT8_C(81), INT8_C(81), INT8_C(81), INT8_C(81), INT8_C(81), INT8_C(81) }, + { -INT8_C(85), -INT8_C(85), -INT8_C(85), -INT8_C(85), -INT8_C(85), -INT8_C(85), -INT8_C(85), -INT8_C(85), + -INT8_C(85), -INT8_C(85), -INT8_C(85), -INT8_C(85), -INT8_C(85), -INT8_C(85), -INT8_C(85), -INT8_C(85) }, + { INT8_C(69), INT8_C(69), INT8_C(69), INT8_C(69), INT8_C(69), INT8_C(69), INT8_C(69), INT8_C(69), + INT8_C(69), INT8_C(69), INT8_C(69), INT8_C(69), INT8_C(69), INT8_C(69), INT8_C(69), INT8_C(69) }, + { INT8_C(85), INT8_C(85), INT8_C(85), INT8_C(85), INT8_C(85), INT8_C(85), INT8_C(85), INT8_C(85), + INT8_C(85), INT8_C(85), INT8_C(85), INT8_C(85), INT8_C(85), INT8_C(85), INT8_C(85), INT8_C(85) } } }, + { { INT8_C(96), -INT8_C(82), -INT8_C(62), -INT8_C(37) }, + { INT8_C(42), -INT8_C(5), INT8_C(6), INT8_C(64)}, + { { INT8_C(96), INT8_C(96), INT8_C(96), INT8_C(96), INT8_C(96), INT8_C(96), INT8_C(96), INT8_C(96), + INT8_C(96), INT8_C(96), INT8_C(96), INT8_C(96), INT8_C(96), INT8_C(96), INT8_C(96), INT8_C(96) }, + { -INT8_C(82), -INT8_C(82), -INT8_C(82), -INT8_C(82), -INT8_C(82), -INT8_C(82), -INT8_C(82), -INT8_C(82), + -INT8_C(82), -INT8_C(82), -INT8_C(82), -INT8_C(82), -INT8_C(82), -INT8_C(82), -INT8_C(82), -INT8_C(82) }, + { -INT8_C(62), -INT8_C(62), -INT8_C(62), -INT8_C(62), -INT8_C(62), -INT8_C(62), -INT8_C(62), -INT8_C(62), + -INT8_C(62), -INT8_C(62), -INT8_C(62), -INT8_C(62), -INT8_C(62), -INT8_C(62), -INT8_C(62), -INT8_C(62) }, + { -INT8_C(37), -INT8_C(37), -INT8_C(37), -INT8_C(37), -INT8_C(37), -INT8_C(37), -INT8_C(37), -INT8_C(37), + -INT8_C(37), -INT8_C(37), -INT8_C(37), -INT8_C(37), -INT8_C(37), -INT8_C(37), -INT8_C(37), -INT8_C(37) } } }, + { { INT8_C(97), INT8_C(10), INT8_C(98), -INT8_C(6) }, + { INT8_C(48), INT8_C(21), INT8_C(54), INT8_C(91)}, + { { INT8_C(97), INT8_C(97), INT8_C(97), INT8_C(97), INT8_C(97), INT8_C(97), INT8_C(97), INT8_C(97), + INT8_C(97), INT8_C(97), INT8_C(97), INT8_C(97), INT8_C(97), INT8_C(97), INT8_C(97), INT8_C(97) }, + { INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10), + INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10), INT8_C(10) }, + { INT8_C(98), INT8_C(98), INT8_C(98), INT8_C(98), INT8_C(98), INT8_C(98), INT8_C(98), INT8_C(98), + INT8_C(98), INT8_C(98), INT8_C(98), INT8_C(98), INT8_C(98), INT8_C(98), INT8_C(98), INT8_C(98) }, + { -INT8_C(6), -INT8_C(6), -INT8_C(6), -INT8_C(6), -INT8_C(6), -INT8_C(6), -INT8_C(6), -INT8_C(6), + -INT8_C(6), -INT8_C(6), -INT8_C(6), -INT8_C(6), -INT8_C(6), -INT8_C(6), -INT8_C(6), -INT8_C(6) } } }, + { { -INT8_C(59), -INT8_C(78), -INT8_C(32), -INT8_C(77) }, + { -INT8_C(6), INT8_C(20), -INT8_C(70), INT8_C(62)}, + { { -INT8_C(59), -INT8_C(59), -INT8_C(59), -INT8_C(59), -INT8_C(59), -INT8_C(59), -INT8_C(59), -INT8_C(59), + -INT8_C(59), -INT8_C(59), -INT8_C(59), -INT8_C(59), -INT8_C(59), -INT8_C(59), -INT8_C(59), -INT8_C(59) }, + { -INT8_C(78), -INT8_C(78), -INT8_C(78), -INT8_C(78), -INT8_C(78), -INT8_C(78), -INT8_C(78), -INT8_C(78), + -INT8_C(78), -INT8_C(78), -INT8_C(78), -INT8_C(78), -INT8_C(78), -INT8_C(78), -INT8_C(78), -INT8_C(78) }, + { -INT8_C(32), -INT8_C(32), -INT8_C(32), -INT8_C(32), -INT8_C(32), -INT8_C(32), -INT8_C(32), -INT8_C(32), + -INT8_C(32), -INT8_C(32), -INT8_C(32), -INT8_C(32), -INT8_C(32), -INT8_C(32), -INT8_C(32), -INT8_C(32) }, + { -INT8_C(77), -INT8_C(77), -INT8_C(77), -INT8_C(77), -INT8_C(77), -INT8_C(77), -INT8_C(77), -INT8_C(77), + -INT8_C(77), -INT8_C(77), -INT8_C(77), -INT8_C(77), -INT8_C(77), -INT8_C(77), -INT8_C(77), -INT8_C(77) } } }, + { { INT8_C(73), -INT8_C(34), INT8_C(91), INT8_C(29) }, + { INT8_C(72), -INT8_C(56), INT8_C(92), INT8_C(43)}, + { { INT8_C(73), INT8_C(73), INT8_C(73), INT8_C(73), INT8_C(73), INT8_C(73), INT8_C(73), INT8_C(73), + INT8_C(73), INT8_C(73), INT8_C(73), INT8_C(73), INT8_C(73), INT8_C(73), INT8_C(73), INT8_C(73) }, + { -INT8_C(34), -INT8_C(34), -INT8_C(34), -INT8_C(34), -INT8_C(34), -INT8_C(34), -INT8_C(34), -INT8_C(34), + -INT8_C(34), -INT8_C(34), -INT8_C(34), -INT8_C(34), -INT8_C(34), -INT8_C(34), -INT8_C(34), -INT8_C(34) }, + { INT8_C(91), INT8_C(91), INT8_C(91), INT8_C(91), INT8_C(91), INT8_C(91), INT8_C(91), INT8_C(91), + INT8_C(91), INT8_C(91), INT8_C(91), INT8_C(91), INT8_C(91), INT8_C(91), INT8_C(91), INT8_C(91) }, + { INT8_C(29), INT8_C(29), INT8_C(29), INT8_C(29), INT8_C(29), INT8_C(29), INT8_C(29), INT8_C(29), + INT8_C(29), INT8_C(29), INT8_C(29), INT8_C(29), INT8_C(29), INT8_C(29), INT8_C(29), INT8_C(29) } } }, + { { INT8_C(57), -INT8_C(74), INT8_C(59), INT8_C(19) }, + { INT8_C(85), INT8_C(21), INT8_C(92), INT8_C(11)}, + { { INT8_C(57), INT8_C(57), INT8_C(57), INT8_C(57), INT8_C(57), INT8_C(57), INT8_C(57), INT8_C(57), + INT8_C(57), INT8_C(57), INT8_C(57), INT8_C(57), INT8_C(57), INT8_C(57), INT8_C(57), INT8_C(57) }, + { -INT8_C(74), -INT8_C(74), -INT8_C(74), -INT8_C(74), -INT8_C(74), -INT8_C(74), -INT8_C(74), -INT8_C(74), + -INT8_C(74), -INT8_C(74), -INT8_C(74), -INT8_C(74), -INT8_C(74), -INT8_C(74), -INT8_C(74), -INT8_C(74) }, + { INT8_C(59), INT8_C(59), INT8_C(59), INT8_C(59), INT8_C(59), INT8_C(59), INT8_C(59), INT8_C(59), + INT8_C(59), INT8_C(59), INT8_C(59), INT8_C(59), INT8_C(59), INT8_C(59), INT8_C(59), INT8_C(59) }, + { INT8_C(19), INT8_C(19), INT8_C(19), INT8_C(19), INT8_C(19), INT8_C(19), INT8_C(19), INT8_C(19), + INT8_C(19), INT8_C(19), INT8_C(19), INT8_C(19), INT8_C(19), INT8_C(19), INT8_C(19), INT8_C(19) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x16x4_t r = simde_vld4q_dup_s8(test_vec[i].a); + simde_int8x16x4_t expected = { + {simde_vld1q_s8(test_vec[i].r[0]), simde_vld1q_s8(test_vec[i].r[1]), + simde_vld1q_s8(test_vec[i].r[2]), simde_vld1q_s8(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_i8x16(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i8x16(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i8x16(r.val[2], expected.val[2]); + simde_test_arm_neon_assert_equal_i8x16(r.val[3], expected.val[3]); + } + + return 0; +} + +static int +test_simde_vld4q_dup_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[4]; + int16_t unused[4]; + int16_t r[4][8]; + } test_vec[] = { + { { -INT16_C(1310), -INT16_C(2011), -INT16_C(5610), INT16_C(5601) }, + { -INT16_C(8476), -INT16_C(8186), -INT16_C(3033), INT16_C(4031)}, + { { -INT16_C(1310), -INT16_C(1310), -INT16_C(1310), -INT16_C(1310), + -INT16_C(1310), -INT16_C(1310), -INT16_C(1310), -INT16_C(1310) }, + { -INT16_C(2011), -INT16_C(2011), -INT16_C(2011), -INT16_C(2011), + -INT16_C(2011), -INT16_C(2011), -INT16_C(2011), -INT16_C(2011) }, + { -INT16_C(5610), -INT16_C(5610), -INT16_C(5610), -INT16_C(5610), + -INT16_C(5610), -INT16_C(5610), -INT16_C(5610), -INT16_C(5610) }, + { INT16_C(5601), INT16_C(5601), INT16_C(5601), INT16_C(5601), + INT16_C(5601), INT16_C(5601), INT16_C(5601), INT16_C(5601) } } }, + { { INT16_C(8148), INT16_C(2754), INT16_C(8342), -INT16_C(1139) }, + { -INT16_C(5489), INT16_C(6809), -INT16_C(2547), -INT16_C(3550)}, + { { INT16_C(8148), INT16_C(8148), INT16_C(8148), INT16_C(8148), + INT16_C(8148), INT16_C(8148), INT16_C(8148), INT16_C(8148) }, + { INT16_C(2754), INT16_C(2754), INT16_C(2754), INT16_C(2754), + INT16_C(2754), INT16_C(2754), INT16_C(2754), INT16_C(2754) }, + { INT16_C(8342), INT16_C(8342), INT16_C(8342), INT16_C(8342), + INT16_C(8342), INT16_C(8342), INT16_C(8342), INT16_C(8342) }, + { -INT16_C(1139), -INT16_C(1139), -INT16_C(1139), -INT16_C(1139), + -INT16_C(1139), -INT16_C(1139), -INT16_C(1139), -INT16_C(1139) } } }, + { { -INT16_C(3232), INT16_C(1913), INT16_C(1602), INT16_C(8369) }, + { -INT16_C(6047), -INT16_C(2404), -INT16_C(7005), -INT16_C(7266)}, + { { -INT16_C(3232), -INT16_C(3232), -INT16_C(3232), -INT16_C(3232), + -INT16_C(3232), -INT16_C(3232), -INT16_C(3232), -INT16_C(3232) }, + { INT16_C(1913), INT16_C(1913), INT16_C(1913), INT16_C(1913), + INT16_C(1913), INT16_C(1913), INT16_C(1913), INT16_C(1913) }, + { INT16_C(1602), INT16_C(1602), INT16_C(1602), INT16_C(1602), + INT16_C(1602), INT16_C(1602), INT16_C(1602), INT16_C(1602) }, + { INT16_C(8369), INT16_C(8369), INT16_C(8369), INT16_C(8369), + INT16_C(8369), INT16_C(8369), INT16_C(8369), INT16_C(8369) } } }, + { { INT16_C(6692), INT16_C(9283), -INT16_C(9852), -INT16_C(2207) }, + { INT16_C(8934), INT16_C(8583), -INT16_C(3148), -INT16_C(2677)}, + { { INT16_C(6692), INT16_C(6692), INT16_C(6692), INT16_C(6692), + INT16_C(6692), INT16_C(6692), INT16_C(6692), INT16_C(6692) }, + { INT16_C(9283), INT16_C(9283), INT16_C(9283), INT16_C(9283), + INT16_C(9283), INT16_C(9283), INT16_C(9283), INT16_C(9283) }, + { -INT16_C(9852), -INT16_C(9852), -INT16_C(9852), -INT16_C(9852), + -INT16_C(9852), -INT16_C(9852), -INT16_C(9852), -INT16_C(9852) }, + { -INT16_C(2207), -INT16_C(2207), -INT16_C(2207), -INT16_C(2207), + -INT16_C(2207), -INT16_C(2207), -INT16_C(2207), -INT16_C(2207) } } }, + { { INT16_C(5481), -INT16_C(8270), INT16_C(5603), INT16_C(8130) }, + { -INT16_C(594), -INT16_C(9174), INT16_C(2942), INT16_C(9649)}, + { { INT16_C(5481), INT16_C(5481), INT16_C(5481), INT16_C(5481), + INT16_C(5481), INT16_C(5481), INT16_C(5481), INT16_C(5481) }, + { -INT16_C(8270), -INT16_C(8270), -INT16_C(8270), -INT16_C(8270), + -INT16_C(8270), -INT16_C(8270), -INT16_C(8270), -INT16_C(8270) }, + { INT16_C(5603), INT16_C(5603), INT16_C(5603), INT16_C(5603), + INT16_C(5603), INT16_C(5603), INT16_C(5603), INT16_C(5603) }, + { INT16_C(8130), INT16_C(8130), INT16_C(8130), INT16_C(8130), + INT16_C(8130), INT16_C(8130), INT16_C(8130), INT16_C(8130) } } }, + { { INT16_C(2622), INT16_C(658), INT16_C(85), INT16_C(5386) }, + { INT16_C(4188), -INT16_C(8037), -INT16_C(2112), INT16_C(1973)}, + { { INT16_C(2622), INT16_C(2622), INT16_C(2622), INT16_C(2622), + INT16_C(2622), INT16_C(2622), INT16_C(2622), INT16_C(2622) }, + { INT16_C(658), INT16_C(658), INT16_C(658), INT16_C(658), + INT16_C(658), INT16_C(658), INT16_C(658), INT16_C(658) }, + { INT16_C(85), INT16_C(85), INT16_C(85), INT16_C(85), + INT16_C(85), INT16_C(85), INT16_C(85), INT16_C(85) }, + { INT16_C(5386), INT16_C(5386), INT16_C(5386), INT16_C(5386), + INT16_C(5386), INT16_C(5386), INT16_C(5386), INT16_C(5386) } } }, + { { -INT16_C(2202), -INT16_C(6550), -INT16_C(1489), INT16_C(9813) }, + { -INT16_C(9394), -INT16_C(9999), -INT16_C(116), -INT16_C(1583)}, + { { -INT16_C(2202), -INT16_C(2202), -INT16_C(2202), -INT16_C(2202), + -INT16_C(2202), -INT16_C(2202), -INT16_C(2202), -INT16_C(2202) }, + { -INT16_C(6550), -INT16_C(6550), -INT16_C(6550), -INT16_C(6550), + -INT16_C(6550), -INT16_C(6550), -INT16_C(6550), -INT16_C(6550) }, + { -INT16_C(1489), -INT16_C(1489), -INT16_C(1489), -INT16_C(1489), + -INT16_C(1489), -INT16_C(1489), -INT16_C(1489), -INT16_C(1489) }, + { INT16_C(9813), INT16_C(9813), INT16_C(9813), INT16_C(9813), + INT16_C(9813), INT16_C(9813), INT16_C(9813), INT16_C(9813) } } }, + { { INT16_C(1789), INT16_C(3725), -INT16_C(5614), -INT16_C(177) }, + { INT16_C(7432), INT16_C(3078), INT16_C(1340), INT16_C(5203)}, + { { INT16_C(1789), INT16_C(1789), INT16_C(1789), INT16_C(1789), + INT16_C(1789), INT16_C(1789), INT16_C(1789), INT16_C(1789) }, + { INT16_C(3725), INT16_C(3725), INT16_C(3725), INT16_C(3725), + INT16_C(3725), INT16_C(3725), INT16_C(3725), INT16_C(3725) }, + { -INT16_C(5614), -INT16_C(5614), -INT16_C(5614), -INT16_C(5614), + -INT16_C(5614), -INT16_C(5614), -INT16_C(5614), -INT16_C(5614) }, + { -INT16_C(177), -INT16_C(177), -INT16_C(177), -INT16_C(177), + -INT16_C(177), -INT16_C(177), -INT16_C(177), -INT16_C(177) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8x4_t r = simde_vld4q_dup_s16(test_vec[i].a); + simde_int16x8x4_t expected = { + {simde_vld1q_s16(test_vec[i].r[0]), simde_vld1q_s16(test_vec[i].r[1]), + simde_vld1q_s16(test_vec[i].r[2]), simde_vld1q_s16(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_i16x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i16x8(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i16x8(r.val[2], expected.val[2]); + simde_test_arm_neon_assert_equal_i16x8(r.val[3], expected.val[3]); + } + + return 0; +} + +static int +test_simde_vld4q_dup_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int32_t unused[4]; + int32_t r[4][4]; + } test_vec[] = { + { { INT32_C(96036), INT32_C(276706), -INT32_C(576383), INT32_C(787308) }, + { -INT32_C(917425), -INT32_C(795228), -INT32_C(300239), -INT32_C(520199)}, + { { INT32_C(96036), INT32_C(96036), INT32_C(96036), INT32_C(96036) }, + { INT32_C(276706), INT32_C(276706), INT32_C(276706), INT32_C(276706) }, + { -INT32_C(576383), -INT32_C(576383), -INT32_C(576383), -INT32_C(576383) }, + { INT32_C(787308), INT32_C(787308), INT32_C(787308), INT32_C(787308) } } }, + { { INT32_C(197220), -INT32_C(589269), -INT32_C(684313), -INT32_C(673963) }, + { -INT32_C(626857), -INT32_C(238551), -INT32_C(117726), -INT32_C(575676)}, + { { INT32_C(197220), INT32_C(197220), INT32_C(197220), INT32_C(197220) }, + { -INT32_C(589269), -INT32_C(589269), -INT32_C(589269), -INT32_C(589269) }, + { -INT32_C(684313), -INT32_C(684313), -INT32_C(684313), -INT32_C(684313) }, + { -INT32_C(673963), -INT32_C(673963), -INT32_C(673963), -INT32_C(673963) } } }, + { { INT32_C(925544), INT32_C(818928), INT32_C(78068), -INT32_C(500546) }, + { INT32_C(721350), INT32_C(786469), -INT32_C(61037), -INT32_C(414620)}, + { { INT32_C(925544), INT32_C(925544), INT32_C(925544), INT32_C(925544) }, + { INT32_C(818928), INT32_C(818928), INT32_C(818928), INT32_C(818928) }, + { INT32_C(78068), INT32_C(78068), INT32_C(78068), INT32_C(78068) }, + { -INT32_C(500546), -INT32_C(500546), -INT32_C(500546), -INT32_C(500546) } } }, + { { -INT32_C(852696), -INT32_C(319929), -INT32_C(18615), -INT32_C(820282) }, + { -INT32_C(669753), -INT32_C(107857), -INT32_C(923798), -INT32_C(894016)}, + { { -INT32_C(852696), -INT32_C(852696), -INT32_C(852696), -INT32_C(852696) }, + { -INT32_C(319929), -INT32_C(319929), -INT32_C(319929), -INT32_C(319929) }, + { -INT32_C(18615), -INT32_C(18615), -INT32_C(18615), -INT32_C(18615) }, + { -INT32_C(820282), -INT32_C(820282), -INT32_C(820282), -INT32_C(820282) } } }, + { { -INT32_C(28238), -INT32_C(717470), INT32_C(330287), -INT32_C(918323) }, + { -INT32_C(572545), INT32_C(541080), -INT32_C(285996), -INT32_C(731588)}, + { { -INT32_C(28238), -INT32_C(28238), -INT32_C(28238), -INT32_C(28238) }, + { -INT32_C(717470), -INT32_C(717470), -INT32_C(717470), -INT32_C(717470) }, + { INT32_C(330287), INT32_C(330287), INT32_C(330287), INT32_C(330287) }, + { -INT32_C(918323), -INT32_C(918323), -INT32_C(918323), -INT32_C(918323) } } }, + { { INT32_C(304604), INT32_C(749469), -INT32_C(843860), INT32_C(236450) }, + { -INT32_C(751345), INT32_C(386071), INT32_C(252136), INT32_C(758287)}, + { { INT32_C(304604), INT32_C(304604), INT32_C(304604), INT32_C(304604) }, + { INT32_C(749469), INT32_C(749469), INT32_C(749469), INT32_C(749469) }, + { -INT32_C(843860), -INT32_C(843860), -INT32_C(843860), -INT32_C(843860) }, + { INT32_C(236450), INT32_C(236450), INT32_C(236450), INT32_C(236450) } } }, + { { INT32_C(641773), INT32_C(790736), -INT32_C(2616), INT32_C(175478) }, + { INT32_C(363459), -INT32_C(479556), -INT32_C(939682), -INT32_C(215811)}, + { { INT32_C(641773), INT32_C(641773), INT32_C(641773), INT32_C(641773) }, + { INT32_C(790736), INT32_C(790736), INT32_C(790736), INT32_C(790736) }, + { -INT32_C(2616), -INT32_C(2616), -INT32_C(2616), -INT32_C(2616) }, + { INT32_C(175478), INT32_C(175478), INT32_C(175478), INT32_C(175478) } } }, + { { INT32_C(909638), INT32_C(191353), INT32_C(158729), INT32_C(1951) }, + { INT32_C(269992), -INT32_C(166368), INT32_C(437900), -INT32_C(328553)}, + { { INT32_C(909638), INT32_C(909638), INT32_C(909638), INT32_C(909638) }, + { INT32_C(191353), INT32_C(191353), INT32_C(191353), INT32_C(191353) }, + { INT32_C(158729), INT32_C(158729), INT32_C(158729), INT32_C(158729) }, + { INT32_C(1951), INT32_C(1951), INT32_C(1951), INT32_C(1951) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4x4_t r = simde_vld4q_dup_s32(test_vec[i].a); + simde_int32x4x4_t expected = { + {simde_vld1q_s32(test_vec[i].r[0]), simde_vld1q_s32(test_vec[i].r[1]), + simde_vld1q_s32(test_vec[i].r[2]), simde_vld1q_s32(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_i32x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i32x4(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i32x4(r.val[2], expected.val[2]); + simde_test_arm_neon_assert_equal_i32x4(r.val[3], expected.val[3]); + } + + return 0; +} + +static int +test_simde_vld4q_dup_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[4]; + int64_t unused[4]; + int64_t r[4][2]; + } test_vec[] = { + { { -INT64_C(33824478), -INT64_C(5187232), INT64_C(39515006), -INT64_C(80914792) }, + { INT64_C(93726945), -INT64_C(25808930), INT64_C(63257430), INT64_C(30498672)}, + { { -INT64_C(33824478), -INT64_C(33824478) }, + { -INT64_C(5187232), -INT64_C(5187232) }, + { INT64_C(39515006), INT64_C(39515006) }, + { -INT64_C(80914792), -INT64_C(80914792) } } }, + { { INT64_C(79009808), INT64_C(23479431), INT64_C(7280288), -INT64_C(4073394) }, + { INT64_C(91088968), -INT64_C(57443644), -INT64_C(67527816), -INT64_C(27085279)}, + { { INT64_C(79009808), INT64_C(79009808) }, + { INT64_C(23479431), INT64_C(23479431) }, + { INT64_C(7280288), INT64_C(7280288) }, + { -INT64_C(4073394), -INT64_C(4073394) } } }, + { { INT64_C(47560098), -INT64_C(42568172), -INT64_C(84940253), -INT64_C(13762228) }, + { INT64_C(51818022), INT64_C(64955838), INT64_C(72970264), INT64_C(15597298)}, + { { INT64_C(47560098), INT64_C(47560098) }, + { -INT64_C(42568172), -INT64_C(42568172) }, + { -INT64_C(84940253), -INT64_C(84940253) }, + { -INT64_C(13762228), -INT64_C(13762228) } } }, + { { -INT64_C(94228216), INT64_C(86016311), INT64_C(44715658), INT64_C(98567682) }, + { -INT64_C(48698288), INT64_C(40924262), -INT64_C(71124922), INT64_C(97823707)}, + { { -INT64_C(94228216), -INT64_C(94228216) }, + { INT64_C(86016311), INT64_C(86016311) }, + { INT64_C(44715658), INT64_C(44715658) }, + { INT64_C(98567682), INT64_C(98567682) } } }, + { { -INT64_C(23294763), -INT64_C(18329751), INT64_C(202119), -INT64_C(93023835) }, + { -INT64_C(72529621), -INT64_C(2617545), -INT64_C(51382967), -INT64_C(19480654)}, + { { -INT64_C(23294763), -INT64_C(23294763) }, + { -INT64_C(18329751), -INT64_C(18329751) }, + { INT64_C(202119), INT64_C(202119) }, + { -INT64_C(93023835), -INT64_C(93023835) } } }, + { { -INT64_C(74212734), -INT64_C(67029147), -INT64_C(98945421), -INT64_C(45976698) }, + { -INT64_C(24390280), INT64_C(90621403), INT64_C(71367745), INT64_C(89478676)}, + { { -INT64_C(74212734), -INT64_C(74212734) }, + { -INT64_C(67029147), -INT64_C(67029147) }, + { -INT64_C(98945421), -INT64_C(98945421) }, + { -INT64_C(45976698), -INT64_C(45976698) } } }, + { { INT64_C(19216507), INT64_C(87031207), -INT64_C(92732196), INT64_C(17901903) }, + { -INT64_C(63712817), -INT64_C(52596459), -INT64_C(56846456), INT64_C(83839716)}, + { { INT64_C(19216507), INT64_C(19216507) }, + { INT64_C(87031207), INT64_C(87031207) }, + { -INT64_C(92732196), -INT64_C(92732196) }, + { INT64_C(17901903), INT64_C(17901903) } } }, + { { INT64_C(22223414), INT64_C(16045202), -INT64_C(10451385), -INT64_C(43892407) }, + { INT64_C(9088806), INT64_C(5526600), -INT64_C(39966909), -INT64_C(16888026)}, + { { INT64_C(22223414), INT64_C(22223414) }, + { INT64_C(16045202), INT64_C(16045202) }, + { -INT64_C(10451385), -INT64_C(10451385) }, + { -INT64_C(43892407), -INT64_C(43892407) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2x4_t r = simde_vld4q_dup_s64(test_vec[i].a); + simde_int64x2x4_t expected = { + {simde_vld1q_s64(test_vec[i].r[0]), simde_vld1q_s64(test_vec[i].r[1]), + simde_vld1q_s64(test_vec[i].r[2]), simde_vld1q_s64(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_i64x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_i64x2(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_i64x2(r.val[2], expected.val[2]); + simde_test_arm_neon_assert_equal_i64x2(r.val[3], expected.val[3]); + } + + return 0; +} + +static int +test_simde_vld4q_dup_u8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint8_t a[4]; + uint8_t unused[4]; + uint8_t r[4][16]; + } test_vec[] = { + { { UINT8_C(116), UINT8_C(71), UINT8_C(49), UINT8_C(111) }, + { UINT8_C(32), UINT8_C(42), UINT8_C(155), UINT8_C(143)}, + { { UINT8_C(116), UINT8_C(116), UINT8_C(116), UINT8_C(116), UINT8_C(116), UINT8_C(116), UINT8_C(116), UINT8_C(116), + UINT8_C(116), UINT8_C(116), UINT8_C(116), UINT8_C(116), UINT8_C(116), UINT8_C(116), UINT8_C(116), UINT8_C(116) }, + { UINT8_C(71), UINT8_C(71), UINT8_C(71), UINT8_C(71), UINT8_C(71), UINT8_C(71), UINT8_C(71), UINT8_C(71), + UINT8_C(71), UINT8_C(71), UINT8_C(71), UINT8_C(71), UINT8_C(71), UINT8_C(71), UINT8_C(71), UINT8_C(71) }, + { UINT8_C(49), UINT8_C(49), UINT8_C(49), UINT8_C(49), UINT8_C(49), UINT8_C(49), UINT8_C(49), UINT8_C(49), + UINT8_C(49), UINT8_C(49), UINT8_C(49), UINT8_C(49), UINT8_C(49), UINT8_C(49), UINT8_C(49), UINT8_C(49) }, + { UINT8_C(111), UINT8_C(111), UINT8_C(111), UINT8_C(111), UINT8_C(111), UINT8_C(111), UINT8_C(111), UINT8_C(111), + UINT8_C(111), UINT8_C(111), UINT8_C(111), UINT8_C(111), UINT8_C(111), UINT8_C(111), UINT8_C(111), UINT8_C(111) } } }, + { { UINT8_C(78), UINT8_C(123), UINT8_C(149), UINT8_C(6) }, + { UINT8_C(99), UINT8_C(125), UINT8_C(28), UINT8_C(85)}, + { { UINT8_C(78), UINT8_C(78), UINT8_C(78), UINT8_C(78), UINT8_C(78), UINT8_C(78), UINT8_C(78), UINT8_C(78), + UINT8_C(78), UINT8_C(78), UINT8_C(78), UINT8_C(78), UINT8_C(78), UINT8_C(78), UINT8_C(78), UINT8_C(78) }, + { UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), + UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123) }, + { UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149), + UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149), UINT8_C(149) }, + { UINT8_C(6), UINT8_C(6), UINT8_C(6), UINT8_C(6), UINT8_C(6), UINT8_C(6), UINT8_C(6), UINT8_C(6), + UINT8_C(6), UINT8_C(6), UINT8_C(6), UINT8_C(6), UINT8_C(6), UINT8_C(6), UINT8_C(6), UINT8_C(6) } } }, + { { UINT8_C(65), UINT8_C(57), UINT8_C(192), UINT8_C(126) }, + { UINT8_C(178), UINT8_C(53), UINT8_C(96), UINT8_C(39)}, + { { UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65), + UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65), UINT8_C(65) }, + { UINT8_C(57), UINT8_C(57), UINT8_C(57), UINT8_C(57), UINT8_C(57), UINT8_C(57), UINT8_C(57), UINT8_C(57), + UINT8_C(57), UINT8_C(57), UINT8_C(57), UINT8_C(57), UINT8_C(57), UINT8_C(57), UINT8_C(57), UINT8_C(57) }, + { UINT8_C(192), UINT8_C(192), UINT8_C(192), UINT8_C(192), UINT8_C(192), UINT8_C(192), UINT8_C(192), UINT8_C(192), + UINT8_C(192), UINT8_C(192), UINT8_C(192), UINT8_C(192), UINT8_C(192), UINT8_C(192), UINT8_C(192), UINT8_C(192) }, + { UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126), + UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126) } } }, + { { UINT8_C(107), UINT8_C(172), UINT8_C(112), UINT8_C(99) }, + { UINT8_C(140), UINT8_C(193), UINT8_C(42), UINT8_C(31)}, + { { UINT8_C(107), UINT8_C(107), UINT8_C(107), UINT8_C(107), UINT8_C(107), UINT8_C(107), UINT8_C(107), UINT8_C(107), + UINT8_C(107), UINT8_C(107), UINT8_C(107), UINT8_C(107), UINT8_C(107), UINT8_C(107), UINT8_C(107), UINT8_C(107) }, + { UINT8_C(172), UINT8_C(172), UINT8_C(172), UINT8_C(172), UINT8_C(172), UINT8_C(172), UINT8_C(172), UINT8_C(172), + UINT8_C(172), UINT8_C(172), UINT8_C(172), UINT8_C(172), UINT8_C(172), UINT8_C(172), UINT8_C(172), UINT8_C(172) }, + { UINT8_C(112), UINT8_C(112), UINT8_C(112), UINT8_C(112), UINT8_C(112), UINT8_C(112), UINT8_C(112), UINT8_C(112), + UINT8_C(112), UINT8_C(112), UINT8_C(112), UINT8_C(112), UINT8_C(112), UINT8_C(112), UINT8_C(112), UINT8_C(112) }, + { UINT8_C(99), UINT8_C(99), UINT8_C(99), UINT8_C(99), UINT8_C(99), UINT8_C(99), UINT8_C(99), UINT8_C(99), + UINT8_C(99), UINT8_C(99), UINT8_C(99), UINT8_C(99), UINT8_C(99), UINT8_C(99), UINT8_C(99), UINT8_C(99) } } }, + { { UINT8_C(184), UINT8_C(97), UINT8_C(109), UINT8_C(89) }, + { UINT8_C(30), UINT8_C(38), UINT8_C(31), UINT8_C(133)}, + { { UINT8_C(184), UINT8_C(184), UINT8_C(184), UINT8_C(184), UINT8_C(184), UINT8_C(184), UINT8_C(184), UINT8_C(184), + UINT8_C(184), UINT8_C(184), UINT8_C(184), UINT8_C(184), UINT8_C(184), UINT8_C(184), UINT8_C(184), UINT8_C(184) }, + { UINT8_C(97), UINT8_C(97), UINT8_C(97), UINT8_C(97), UINT8_C(97), UINT8_C(97), UINT8_C(97), UINT8_C(97), + UINT8_C(97), UINT8_C(97), UINT8_C(97), UINT8_C(97), UINT8_C(97), UINT8_C(97), UINT8_C(97), UINT8_C(97) }, + { UINT8_C(109), UINT8_C(109), UINT8_C(109), UINT8_C(109), UINT8_C(109), UINT8_C(109), UINT8_C(109), UINT8_C(109), + UINT8_C(109), UINT8_C(109), UINT8_C(109), UINT8_C(109), UINT8_C(109), UINT8_C(109), UINT8_C(109), UINT8_C(109) }, + { UINT8_C(89), UINT8_C(89), UINT8_C(89), UINT8_C(89), UINT8_C(89), UINT8_C(89), UINT8_C(89), UINT8_C(89), + UINT8_C(89), UINT8_C(89), UINT8_C(89), UINT8_C(89), UINT8_C(89), UINT8_C(89), UINT8_C(89), UINT8_C(89) } } }, + { { UINT8_C(80), UINT8_C(53), UINT8_C(126), UINT8_C(83) }, + { UINT8_C(48), UINT8_C(9), UINT8_C(28), UINT8_C(3)}, + { { UINT8_C(80), UINT8_C(80), UINT8_C(80), UINT8_C(80), UINT8_C(80), UINT8_C(80), UINT8_C(80), UINT8_C(80), + UINT8_C(80), UINT8_C(80), UINT8_C(80), UINT8_C(80), UINT8_C(80), UINT8_C(80), UINT8_C(80), UINT8_C(80) }, + { UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), + UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53), UINT8_C(53) }, + { UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126), + UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126), UINT8_C(126) }, + { UINT8_C(83), UINT8_C(83), UINT8_C(83), UINT8_C(83), UINT8_C(83), UINT8_C(83), UINT8_C(83), UINT8_C(83), + UINT8_C(83), UINT8_C(83), UINT8_C(83), UINT8_C(83), UINT8_C(83), UINT8_C(83), UINT8_C(83), UINT8_C(83) } } }, + { { UINT8_C(12), UINT8_C(5), UINT8_C(69), UINT8_C(0) }, + { UINT8_C(124), UINT8_C(136), UINT8_C(44), UINT8_C(0)}, + { { UINT8_C(12), UINT8_C(12), UINT8_C(12), UINT8_C(12), UINT8_C(12), UINT8_C(12), UINT8_C(12), UINT8_C(12), + UINT8_C(12), UINT8_C(12), UINT8_C(12), UINT8_C(12), UINT8_C(12), UINT8_C(12), UINT8_C(12), UINT8_C(12) }, + { UINT8_C(5), UINT8_C(5), UINT8_C(5), UINT8_C(5), UINT8_C(5), UINT8_C(5), UINT8_C(5), UINT8_C(5), + UINT8_C(5), UINT8_C(5), UINT8_C(5), UINT8_C(5), UINT8_C(5), UINT8_C(5), UINT8_C(5), UINT8_C(5) }, + { UINT8_C(69), UINT8_C(69), UINT8_C(69), UINT8_C(69), UINT8_C(69), UINT8_C(69), UINT8_C(69), UINT8_C(69), + UINT8_C(69), UINT8_C(69), UINT8_C(69), UINT8_C(69), UINT8_C(69), UINT8_C(69), UINT8_C(69), UINT8_C(69) }, + { UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), + UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0) } } }, + { { UINT8_C(2), UINT8_C(163), UINT8_C(34), UINT8_C(161) }, + { UINT8_C(146), UINT8_C(64), UINT8_C(123), UINT8_C(49)}, + { { UINT8_C(2), UINT8_C(2), UINT8_C(2), UINT8_C(2), UINT8_C(2), UINT8_C(2), UINT8_C(2), UINT8_C(2), + UINT8_C(2), UINT8_C(2), UINT8_C(2), UINT8_C(2), UINT8_C(2), UINT8_C(2), UINT8_C(2), UINT8_C(2) }, + { UINT8_C(163), UINT8_C(163), UINT8_C(163), UINT8_C(163), UINT8_C(163), UINT8_C(163), UINT8_C(163), UINT8_C(163), + UINT8_C(163), UINT8_C(163), UINT8_C(163), UINT8_C(163), UINT8_C(163), UINT8_C(163), UINT8_C(163), UINT8_C(163) }, + { UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34), + UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34), UINT8_C(34) }, + { UINT8_C(161), UINT8_C(161), UINT8_C(161), UINT8_C(161), UINT8_C(161), UINT8_C(161), UINT8_C(161), UINT8_C(161), + UINT8_C(161), UINT8_C(161), UINT8_C(161), UINT8_C(161), UINT8_C(161), UINT8_C(161), UINT8_C(161), UINT8_C(161) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x16x4_t r = simde_vld4q_dup_u8(test_vec[i].a); + simde_uint8x16x4_t expected = { + {simde_vld1q_u8(test_vec[i].r[0]), simde_vld1q_u8(test_vec[i].r[1]), + simde_vld1q_u8(test_vec[i].r[2]), simde_vld1q_u8(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_u8x16(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u8x16(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u8x16(r.val[2], expected.val[2]); + simde_test_arm_neon_assert_equal_u8x16(r.val[3], expected.val[3]); + } + + return 0; +} + +static int +test_simde_vld4q_dup_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[4]; + uint16_t unused[4]; + uint16_t r[4][8]; + } test_vec[] = { + { { UINT16_C(10133), UINT16_C(6843), UINT16_C(4919), UINT16_C(4910) }, + { UINT16_C(5127), UINT16_C(12463), UINT16_C(6726), UINT16_C(17829)}, + { { UINT16_C(10133), UINT16_C(10133), UINT16_C(10133), UINT16_C(10133), + UINT16_C(10133), UINT16_C(10133), UINT16_C(10133), UINT16_C(10133) }, + { UINT16_C(6843), UINT16_C(6843), UINT16_C(6843), UINT16_C(6843), + UINT16_C(6843), UINT16_C(6843), UINT16_C(6843), UINT16_C(6843) }, + { UINT16_C(4919), UINT16_C(4919), UINT16_C(4919), UINT16_C(4919), + UINT16_C(4919), UINT16_C(4919), UINT16_C(4919), UINT16_C(4919) }, + { UINT16_C(4910), UINT16_C(4910), UINT16_C(4910), UINT16_C(4910), + UINT16_C(4910), UINT16_C(4910), UINT16_C(4910), UINT16_C(4910) } } }, + { { UINT16_C(2738), UINT16_C(7856), UINT16_C(14494), UINT16_C(7102) }, + { UINT16_C(244), UINT16_C(9302), UINT16_C(7532), UINT16_C(5861)}, + { { UINT16_C(2738), UINT16_C(2738), UINT16_C(2738), UINT16_C(2738), + UINT16_C(2738), UINT16_C(2738), UINT16_C(2738), UINT16_C(2738) }, + { UINT16_C(7856), UINT16_C(7856), UINT16_C(7856), UINT16_C(7856), + UINT16_C(7856), UINT16_C(7856), UINT16_C(7856), UINT16_C(7856) }, + { UINT16_C(14494), UINT16_C(14494), UINT16_C(14494), UINT16_C(14494), + UINT16_C(14494), UINT16_C(14494), UINT16_C(14494), UINT16_C(14494) }, + { UINT16_C(7102), UINT16_C(7102), UINT16_C(7102), UINT16_C(7102), + UINT16_C(7102), UINT16_C(7102), UINT16_C(7102), UINT16_C(7102) } } }, + { { UINT16_C(5864), UINT16_C(933), UINT16_C(15453), UINT16_C(2108) }, + { UINT16_C(11794), UINT16_C(15211), UINT16_C(19290), UINT16_C(8938)}, + { { UINT16_C(5864), UINT16_C(5864), UINT16_C(5864), UINT16_C(5864), + UINT16_C(5864), UINT16_C(5864), UINT16_C(5864), UINT16_C(5864) }, + { UINT16_C(933), UINT16_C(933), UINT16_C(933), UINT16_C(933), + UINT16_C(933), UINT16_C(933), UINT16_C(933), UINT16_C(933) }, + { UINT16_C(15453), UINT16_C(15453), UINT16_C(15453), UINT16_C(15453), + UINT16_C(15453), UINT16_C(15453), UINT16_C(15453), UINT16_C(15453) }, + { UINT16_C(2108), UINT16_C(2108), UINT16_C(2108), UINT16_C(2108), + UINT16_C(2108), UINT16_C(2108), UINT16_C(2108), UINT16_C(2108) } } }, + { { UINT16_C(7054), UINT16_C(10852), UINT16_C(1425), UINT16_C(11316) }, + { UINT16_C(4905), UINT16_C(8620), UINT16_C(9572), UINT16_C(6783)}, + { { UINT16_C(7054), UINT16_C(7054), UINT16_C(7054), UINT16_C(7054), + UINT16_C(7054), UINT16_C(7054), UINT16_C(7054), UINT16_C(7054) }, + { UINT16_C(10852), UINT16_C(10852), UINT16_C(10852), UINT16_C(10852), + UINT16_C(10852), UINT16_C(10852), UINT16_C(10852), UINT16_C(10852) }, + { UINT16_C(1425), UINT16_C(1425), UINT16_C(1425), UINT16_C(1425), + UINT16_C(1425), UINT16_C(1425), UINT16_C(1425), UINT16_C(1425) }, + { UINT16_C(11316), UINT16_C(11316), UINT16_C(11316), UINT16_C(11316), + UINT16_C(11316), UINT16_C(11316), UINT16_C(11316), UINT16_C(11316) } } }, + { { UINT16_C(13105), UINT16_C(2420), UINT16_C(11971), UINT16_C(17278) }, + { UINT16_C(2311), UINT16_C(9393), UINT16_C(19286), UINT16_C(16567)}, + { { UINT16_C(13105), UINT16_C(13105), UINT16_C(13105), UINT16_C(13105), + UINT16_C(13105), UINT16_C(13105), UINT16_C(13105), UINT16_C(13105) }, + { UINT16_C(2420), UINT16_C(2420), UINT16_C(2420), UINT16_C(2420), + UINT16_C(2420), UINT16_C(2420), UINT16_C(2420), UINT16_C(2420) }, + { UINT16_C(11971), UINT16_C(11971), UINT16_C(11971), UINT16_C(11971), + UINT16_C(11971), UINT16_C(11971), UINT16_C(11971), UINT16_C(11971) }, + { UINT16_C(17278), UINT16_C(17278), UINT16_C(17278), UINT16_C(17278), + UINT16_C(17278), UINT16_C(17278), UINT16_C(17278), UINT16_C(17278) } } }, + { { UINT16_C(14853), UINT16_C(8577), UINT16_C(6845), UINT16_C(7330) }, + { UINT16_C(3034), UINT16_C(14753), UINT16_C(15783), UINT16_C(13850)}, + { { UINT16_C(14853), UINT16_C(14853), UINT16_C(14853), UINT16_C(14853), + UINT16_C(14853), UINT16_C(14853), UINT16_C(14853), UINT16_C(14853) }, + { UINT16_C(8577), UINT16_C(8577), UINT16_C(8577), UINT16_C(8577), + UINT16_C(8577), UINT16_C(8577), UINT16_C(8577), UINT16_C(8577) }, + { UINT16_C(6845), UINT16_C(6845), UINT16_C(6845), UINT16_C(6845), + UINT16_C(6845), UINT16_C(6845), UINT16_C(6845), UINT16_C(6845) }, + { UINT16_C(7330), UINT16_C(7330), UINT16_C(7330), UINT16_C(7330), + UINT16_C(7330), UINT16_C(7330), UINT16_C(7330), UINT16_C(7330) } } }, + { { UINT16_C(15035), UINT16_C(3809), UINT16_C(2240), UINT16_C(11888) }, + { UINT16_C(2225), UINT16_C(9548), UINT16_C(9735), UINT16_C(9069)}, + { { UINT16_C(15035), UINT16_C(15035), UINT16_C(15035), UINT16_C(15035), + UINT16_C(15035), UINT16_C(15035), UINT16_C(15035), UINT16_C(15035) }, + { UINT16_C(3809), UINT16_C(3809), UINT16_C(3809), UINT16_C(3809), + UINT16_C(3809), UINT16_C(3809), UINT16_C(3809), UINT16_C(3809) }, + { UINT16_C(2240), UINT16_C(2240), UINT16_C(2240), UINT16_C(2240), + UINT16_C(2240), UINT16_C(2240), UINT16_C(2240), UINT16_C(2240) }, + { UINT16_C(11888), UINT16_C(11888), UINT16_C(11888), UINT16_C(11888), + UINT16_C(11888), UINT16_C(11888), UINT16_C(11888), UINT16_C(11888) } } }, + { { UINT16_C(19246), UINT16_C(11341), UINT16_C(6716), UINT16_C(8706) }, + { UINT16_C(10028), UINT16_C(16213), UINT16_C(13201), UINT16_C(14776)}, + { { UINT16_C(19246), UINT16_C(19246), UINT16_C(19246), UINT16_C(19246), + UINT16_C(19246), UINT16_C(19246), UINT16_C(19246), UINT16_C(19246) }, + { UINT16_C(11341), UINT16_C(11341), UINT16_C(11341), UINT16_C(11341), + UINT16_C(11341), UINT16_C(11341), UINT16_C(11341), UINT16_C(11341) }, + { UINT16_C(6716), UINT16_C(6716), UINT16_C(6716), UINT16_C(6716), + UINT16_C(6716), UINT16_C(6716), UINT16_C(6716), UINT16_C(6716) }, + { UINT16_C(8706), UINT16_C(8706), UINT16_C(8706), UINT16_C(8706), + UINT16_C(8706), UINT16_C(8706), UINT16_C(8706), UINT16_C(8706) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x8x4_t r = simde_vld4q_dup_u16(test_vec[i].a); + simde_uint16x8x4_t expected = { + {simde_vld1q_u16(test_vec[i].r[0]), simde_vld1q_u16(test_vec[i].r[1]), + simde_vld1q_u16(test_vec[i].r[2]), simde_vld1q_u16(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_u16x8(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u16x8(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u16x8(r.val[2], expected.val[2]); + simde_test_arm_neon_assert_equal_u16x8(r.val[3], expected.val[3]); + } + + return 0; +} + +static int +test_simde_vld4q_dup_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[4]; + uint32_t unused[4]; + uint32_t r[4][4]; + } test_vec[] = { + { { UINT32_C(1257486), UINT32_C(1370541), UINT32_C(1442704), UINT32_C(6473) }, + { UINT32_C(473754), UINT32_C(1121548), UINT32_C(1019225), UINT32_C(1532022)}, + { { UINT32_C(1257486), UINT32_C(1257486), UINT32_C(1257486), UINT32_C(1257486) }, + { UINT32_C(1370541), UINT32_C(1370541), UINT32_C(1370541), UINT32_C(1370541) }, + { UINT32_C(1442704), UINT32_C(1442704), UINT32_C(1442704), UINT32_C(1442704) }, + { UINT32_C(6473), UINT32_C(6473), UINT32_C(6473), UINT32_C(6473) } } }, + { { UINT32_C(1777965), UINT32_C(1389944), UINT32_C(409346), UINT32_C(538095) }, + { UINT32_C(1323377), UINT32_C(928426), UINT32_C(983700), UINT32_C(1385753)}, + { { UINT32_C(1777965), UINT32_C(1777965), UINT32_C(1777965), UINT32_C(1777965) }, + { UINT32_C(1389944), UINT32_C(1389944), UINT32_C(1389944), UINT32_C(1389944) }, + { UINT32_C(409346), UINT32_C(409346), UINT32_C(409346), UINT32_C(409346) }, + { UINT32_C(538095), UINT32_C(538095), UINT32_C(538095), UINT32_C(538095) } } }, + { { UINT32_C(596045), UINT32_C(764597), UINT32_C(1063129), UINT32_C(1422260) }, + { UINT32_C(1733416), UINT32_C(1030230), UINT32_C(1044242), UINT32_C(324599)}, + { { UINT32_C(596045), UINT32_C(596045), UINT32_C(596045), UINT32_C(596045) }, + { UINT32_C(764597), UINT32_C(764597), UINT32_C(764597), UINT32_C(764597) }, + { UINT32_C(1063129), UINT32_C(1063129), UINT32_C(1063129), UINT32_C(1063129) }, + { UINT32_C(1422260), UINT32_C(1422260), UINT32_C(1422260), UINT32_C(1422260) } } }, + { { UINT32_C(1423480), UINT32_C(1138531), UINT32_C(90322), UINT32_C(481969) }, + { UINT32_C(1376772), UINT32_C(1695937), UINT32_C(624624), UINT32_C(1995648)}, + { { UINT32_C(1423480), UINT32_C(1423480), UINT32_C(1423480), UINT32_C(1423480) }, + { UINT32_C(1138531), UINT32_C(1138531), UINT32_C(1138531), UINT32_C(1138531) }, + { UINT32_C(90322), UINT32_C(90322), UINT32_C(90322), UINT32_C(90322) }, + { UINT32_C(481969), UINT32_C(481969), UINT32_C(481969), UINT32_C(481969) } } }, + { { UINT32_C(717226), UINT32_C(1436576), UINT32_C(931521), UINT32_C(1409184) }, + { UINT32_C(645928), UINT32_C(1920577), UINT32_C(329937), UINT32_C(446258)}, + { { UINT32_C(717226), UINT32_C(717226), UINT32_C(717226), UINT32_C(717226) }, + { UINT32_C(1436576), UINT32_C(1436576), UINT32_C(1436576), UINT32_C(1436576) }, + { UINT32_C(931521), UINT32_C(931521), UINT32_C(931521), UINT32_C(931521) }, + { UINT32_C(1409184), UINT32_C(1409184), UINT32_C(1409184), UINT32_C(1409184) } } }, + { { UINT32_C(233155), UINT32_C(139215), UINT32_C(269390), UINT32_C(626804) }, + { UINT32_C(1203036), UINT32_C(383145), UINT32_C(793331), UINT32_C(1556239)}, + { { UINT32_C(233155), UINT32_C(233155), UINT32_C(233155), UINT32_C(233155) }, + { UINT32_C(139215), UINT32_C(139215), UINT32_C(139215), UINT32_C(139215) }, + { UINT32_C(269390), UINT32_C(269390), UINT32_C(269390), UINT32_C(269390) }, + { UINT32_C(626804), UINT32_C(626804), UINT32_C(626804), UINT32_C(626804) } } }, + { { UINT32_C(1222326), UINT32_C(1812050), UINT32_C(944922), UINT32_C(616220) }, + { UINT32_C(460703), UINT32_C(465318), UINT32_C(530328), UINT32_C(484235)}, + { { UINT32_C(1222326), UINT32_C(1222326), UINT32_C(1222326), UINT32_C(1222326) }, + { UINT32_C(1812050), UINT32_C(1812050), UINT32_C(1812050), UINT32_C(1812050) }, + { UINT32_C(944922), UINT32_C(944922), UINT32_C(944922), UINT32_C(944922) }, + { UINT32_C(616220), UINT32_C(616220), UINT32_C(616220), UINT32_C(616220) } } }, + { { UINT32_C(945985), UINT32_C(95899), UINT32_C(1011438), UINT32_C(1588319) }, + { UINT32_C(437902), UINT32_C(1316605), UINT32_C(646765), UINT32_C(915755)}, + { { UINT32_C(945985), UINT32_C(945985), UINT32_C(945985), UINT32_C(945985) }, + { UINT32_C(95899), UINT32_C(95899), UINT32_C(95899), UINT32_C(95899) }, + { UINT32_C(1011438), UINT32_C(1011438), UINT32_C(1011438), UINT32_C(1011438) }, + { UINT32_C(1588319), UINT32_C(1588319), UINT32_C(1588319), UINT32_C(1588319) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4x4_t r = simde_vld4q_dup_u32(test_vec[i].a); + simde_uint32x4x4_t expected = { + {simde_vld1q_u32(test_vec[i].r[0]), simde_vld1q_u32(test_vec[i].r[1]), + simde_vld1q_u32(test_vec[i].r[2]), simde_vld1q_u32(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_u32x4(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u32x4(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u32x4(r.val[2], expected.val[2]); + simde_test_arm_neon_assert_equal_u32x4(r.val[3], expected.val[3]); + } + + return 0; +} + +static int +test_simde_vld4q_dup_u64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[4]; + uint64_t unused[4]; + uint64_t r[4][2]; + } test_vec[] = { + { { UINT64_C(19415023), UINT64_C(134986722), UINT64_C(140423989), UINT64_C(194366613) }, + { UINT64_C(29748623), UINT64_C(63792851), UINT64_C(184798224), UINT64_C(170880801)}, + { { UINT64_C(19415023), UINT64_C(19415023) }, + { UINT64_C(134986722), UINT64_C(134986722) }, + { UINT64_C(140423989), UINT64_C(140423989) }, + { UINT64_C(194366613), UINT64_C(194366613) } } }, + { { UINT64_C(72477781), UINT64_C(119998170), UINT64_C(19598295), UINT64_C(4615743) }, + { UINT64_C(153105690), UINT64_C(4212704), UINT64_C(31753299), UINT64_C(100870623)}, + { { UINT64_C(72477781), UINT64_C(72477781) }, + { UINT64_C(119998170), UINT64_C(119998170) }, + { UINT64_C(19598295), UINT64_C(19598295) }, + { UINT64_C(4615743), UINT64_C(4615743) } } }, + { { UINT64_C(83267470), UINT64_C(184004374), UINT64_C(3865768), UINT64_C(20318927) }, + { UINT64_C(101590415), UINT64_C(123726578), UINT64_C(162433497), UINT64_C(54552934)}, + { { UINT64_C(83267470), UINT64_C(83267470) }, + { UINT64_C(184004374), UINT64_C(184004374) }, + { UINT64_C(3865768), UINT64_C(3865768) }, + { UINT64_C(20318927), UINT64_C(20318927) } } }, + { { UINT64_C(39898183), UINT64_C(62216994), UINT64_C(153003397), UINT64_C(181219988) }, + { UINT64_C(150010328), UINT64_C(105260849), UINT64_C(187591910), UINT64_C(59479289)}, + { { UINT64_C(39898183), UINT64_C(39898183) }, + { UINT64_C(62216994), UINT64_C(62216994) }, + { UINT64_C(153003397), UINT64_C(153003397) }, + { UINT64_C(181219988), UINT64_C(181219988) } } }, + { { UINT64_C(94456081), UINT64_C(158574380), UINT64_C(117246539), UINT64_C(195475124) }, + { UINT64_C(45127472), UINT64_C(38196881), UINT64_C(167589198), UINT64_C(56546110)}, + { { UINT64_C(94456081), UINT64_C(94456081) }, + { UINT64_C(158574380), UINT64_C(158574380) }, + { UINT64_C(117246539), UINT64_C(117246539) }, + { UINT64_C(195475124), UINT64_C(195475124) } } }, + { { UINT64_C(84845160), UINT64_C(183689028), UINT64_C(107358519), UINT64_C(141662487) }, + { UINT64_C(124767939), UINT64_C(127494288), UINT64_C(109530805), UINT64_C(73133350)}, + { { UINT64_C(84845160), UINT64_C(84845160) }, + { UINT64_C(183689028), UINT64_C(183689028) }, + { UINT64_C(107358519), UINT64_C(107358519) }, + { UINT64_C(141662487), UINT64_C(141662487) } } }, + { { UINT64_C(126339519), UINT64_C(795258), UINT64_C(46341725), UINT64_C(117764884) }, + { UINT64_C(114711556), UINT64_C(24833869), UINT64_C(26165703), UINT64_C(104429137)}, + { { UINT64_C(126339519), UINT64_C(126339519) }, + { UINT64_C(795258), UINT64_C(795258) }, + { UINT64_C(46341725), UINT64_C(46341725) }, + { UINT64_C(117764884), UINT64_C(117764884) } } }, + { { UINT64_C(146405593), UINT64_C(135429224), UINT64_C(127955147), UINT64_C(56324641) }, + { UINT64_C(39205962), UINT64_C(153262080), UINT64_C(163534203), UINT64_C(21735994)}, + { { UINT64_C(146405593), UINT64_C(146405593) }, + { UINT64_C(135429224), UINT64_C(135429224) }, + { UINT64_C(127955147), UINT64_C(127955147) }, + { UINT64_C(56324641), UINT64_C(56324641) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2x4_t r = simde_vld4q_dup_u64(test_vec[i].a); + simde_uint64x2x4_t expected = { + {simde_vld1q_u64(test_vec[i].r[0]), simde_vld1q_u64(test_vec[i].r[1]), + simde_vld1q_u64(test_vec[i].r[2]), simde_vld1q_u64(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_u64x2(r.val[0], expected.val[0]); + simde_test_arm_neon_assert_equal_u64x2(r.val[1], expected.val[1]); + simde_test_arm_neon_assert_equal_u64x2(r.val[2], expected.val[2]); + simde_test_arm_neon_assert_equal_u64x2(r.val[3], expected.val[3]); + } + + return 0; +} + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_TEST_FUNC_LIST_BEGIN +#if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4_dup_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4_dup_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4_dup_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4_dup_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4_dup_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4_dup_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4_dup_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4_dup_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4_dup_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4_dup_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4_dup_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4q_dup_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4q_dup_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4q_dup_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4q_dup_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4q_dup_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4q_dup_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4q_dup_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4q_dup_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4q_dup_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4q_dup_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4q_dup_u64) +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/ld4_lane.c b/test/arm/neon/ld4_lane.c index 1b8b1dbad..d4d85e264 100644 --- a/test/arm/neon/ld4_lane.c +++ b/test/arm/neon/ld4_lane.c @@ -700,6 +700,71 @@ test_simde_vld4_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vld4_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t src[4][4]; + simde_float16_t buf[4]; + simde_float16_t r[4][4]; + } test_vec[] = { + { { { SIMDE_FLOAT16_VALUE(-23.48), SIMDE_FLOAT16_VALUE(-23.67), SIMDE_FLOAT16_VALUE(44.71), SIMDE_FLOAT16_VALUE(-48.89) }, + { SIMDE_FLOAT16_VALUE(46.07), SIMDE_FLOAT16_VALUE(-4.26), SIMDE_FLOAT16_VALUE(-33.58), SIMDE_FLOAT16_VALUE(-23.36) }, + { SIMDE_FLOAT16_VALUE(45.79), SIMDE_FLOAT16_VALUE(8.17), SIMDE_FLOAT16_VALUE(41.11), SIMDE_FLOAT16_VALUE(24.71) }, + { SIMDE_FLOAT16_VALUE(-6.19), SIMDE_FLOAT16_VALUE(-19.02), SIMDE_FLOAT16_VALUE(4.14), SIMDE_FLOAT16_VALUE(9.43) } }, + { SIMDE_FLOAT16_VALUE(-1.12), SIMDE_FLOAT16_VALUE(-28.65), SIMDE_FLOAT16_VALUE(-39.28), SIMDE_FLOAT16_VALUE(-49.01)}, + { { SIMDE_FLOAT16_VALUE(-1.12), SIMDE_FLOAT16_VALUE(-23.67), SIMDE_FLOAT16_VALUE(44.71), SIMDE_FLOAT16_VALUE(-48.89) }, + { SIMDE_FLOAT16_VALUE(-28.65), SIMDE_FLOAT16_VALUE(-4.26), SIMDE_FLOAT16_VALUE(-33.58), SIMDE_FLOAT16_VALUE(-23.36) }, + { SIMDE_FLOAT16_VALUE(-39.28), SIMDE_FLOAT16_VALUE(8.17), SIMDE_FLOAT16_VALUE(41.11), SIMDE_FLOAT16_VALUE(24.71) }, + { SIMDE_FLOAT16_VALUE(-49.01), SIMDE_FLOAT16_VALUE(-19.02), SIMDE_FLOAT16_VALUE(4.14), SIMDE_FLOAT16_VALUE(9.43) } } }, + { { { SIMDE_FLOAT16_VALUE(-14.34), SIMDE_FLOAT16_VALUE(23.49), SIMDE_FLOAT16_VALUE(-10.62), SIMDE_FLOAT16_VALUE(39.64) }, + { SIMDE_FLOAT16_VALUE(-31.94), SIMDE_FLOAT16_VALUE(-41.05), SIMDE_FLOAT16_VALUE(20.30), SIMDE_FLOAT16_VALUE(-28.08) }, + { SIMDE_FLOAT16_VALUE(-46.77), SIMDE_FLOAT16_VALUE(-6.97), SIMDE_FLOAT16_VALUE(10.38), SIMDE_FLOAT16_VALUE(12.19) }, + { SIMDE_FLOAT16_VALUE(41.44), SIMDE_FLOAT16_VALUE(-47.57), SIMDE_FLOAT16_VALUE(-43.76), SIMDE_FLOAT16_VALUE(26.14) } }, + { SIMDE_FLOAT16_VALUE(-20.20), SIMDE_FLOAT16_VALUE(43.64), SIMDE_FLOAT16_VALUE(11.06), SIMDE_FLOAT16_VALUE(-41.55)}, + { { SIMDE_FLOAT16_VALUE(-14.34), SIMDE_FLOAT16_VALUE(-20.20), SIMDE_FLOAT16_VALUE(-10.62), SIMDE_FLOAT16_VALUE(39.64) }, + { SIMDE_FLOAT16_VALUE(-31.94), SIMDE_FLOAT16_VALUE(43.64), SIMDE_FLOAT16_VALUE(20.30), SIMDE_FLOAT16_VALUE(-28.08) }, + { SIMDE_FLOAT16_VALUE(-46.77), SIMDE_FLOAT16_VALUE(11.06), SIMDE_FLOAT16_VALUE(10.38), SIMDE_FLOAT16_VALUE(12.19) }, + { SIMDE_FLOAT16_VALUE(41.44), SIMDE_FLOAT16_VALUE(-41.55), SIMDE_FLOAT16_VALUE(-43.76), SIMDE_FLOAT16_VALUE(26.14) } } }, + { { { SIMDE_FLOAT16_VALUE(-49.94), SIMDE_FLOAT16_VALUE(-3.45), SIMDE_FLOAT16_VALUE(-18.16), SIMDE_FLOAT16_VALUE(-35.32) }, + { SIMDE_FLOAT16_VALUE(-39.02), SIMDE_FLOAT16_VALUE(20.10), SIMDE_FLOAT16_VALUE(9.41), SIMDE_FLOAT16_VALUE(-12.84) }, + { SIMDE_FLOAT16_VALUE(-20.30), SIMDE_FLOAT16_VALUE(18.38), SIMDE_FLOAT16_VALUE(36.37), SIMDE_FLOAT16_VALUE(-15.62) }, + { SIMDE_FLOAT16_VALUE(27.10), SIMDE_FLOAT16_VALUE(-38.20), SIMDE_FLOAT16_VALUE(-15.40), SIMDE_FLOAT16_VALUE(-20.21) } }, + { SIMDE_FLOAT16_VALUE(-9.89), SIMDE_FLOAT16_VALUE(-2.32), SIMDE_FLOAT16_VALUE(-33.75), SIMDE_FLOAT16_VALUE(25.64)}, + { { SIMDE_FLOAT16_VALUE(-49.94), SIMDE_FLOAT16_VALUE(-3.45), SIMDE_FLOAT16_VALUE(-9.89), SIMDE_FLOAT16_VALUE(-35.32) }, + { SIMDE_FLOAT16_VALUE(-39.02), SIMDE_FLOAT16_VALUE(20.10), SIMDE_FLOAT16_VALUE(-2.32), SIMDE_FLOAT16_VALUE(-12.84) }, + { SIMDE_FLOAT16_VALUE(-20.30), SIMDE_FLOAT16_VALUE(18.38), SIMDE_FLOAT16_VALUE(-33.75), SIMDE_FLOAT16_VALUE(-15.62) }, + { SIMDE_FLOAT16_VALUE(27.10), SIMDE_FLOAT16_VALUE(-38.20), SIMDE_FLOAT16_VALUE(25.64), SIMDE_FLOAT16_VALUE(-20.21) } } }, + { { { SIMDE_FLOAT16_VALUE(28.09), SIMDE_FLOAT16_VALUE(-29.96), SIMDE_FLOAT16_VALUE(28.29), SIMDE_FLOAT16_VALUE(-15.35) }, + { SIMDE_FLOAT16_VALUE(15.14), SIMDE_FLOAT16_VALUE(-42.36), SIMDE_FLOAT16_VALUE(-3.50), SIMDE_FLOAT16_VALUE(11.41) }, + { SIMDE_FLOAT16_VALUE(-2.75), SIMDE_FLOAT16_VALUE(20.25), SIMDE_FLOAT16_VALUE(29.83), SIMDE_FLOAT16_VALUE(45.27) }, + { SIMDE_FLOAT16_VALUE(22.78), SIMDE_FLOAT16_VALUE(10.89), SIMDE_FLOAT16_VALUE(42.55), SIMDE_FLOAT16_VALUE(-45.91) } }, + { SIMDE_FLOAT16_VALUE(-8.61), SIMDE_FLOAT16_VALUE(-39.93), SIMDE_FLOAT16_VALUE(-16.44), SIMDE_FLOAT16_VALUE(43.00)}, + { { SIMDE_FLOAT16_VALUE(28.09), SIMDE_FLOAT16_VALUE(-29.96), SIMDE_FLOAT16_VALUE(28.29), SIMDE_FLOAT16_VALUE(-8.61) }, + { SIMDE_FLOAT16_VALUE(15.14), SIMDE_FLOAT16_VALUE(-42.36), SIMDE_FLOAT16_VALUE(-3.50), SIMDE_FLOAT16_VALUE(-39.93) }, + { SIMDE_FLOAT16_VALUE(-2.75), SIMDE_FLOAT16_VALUE(20.25), SIMDE_FLOAT16_VALUE(29.83), SIMDE_FLOAT16_VALUE(-16.44) }, + { SIMDE_FLOAT16_VALUE(22.78), SIMDE_FLOAT16_VALUE(10.89), SIMDE_FLOAT16_VALUE(42.55), SIMDE_FLOAT16_VALUE(43.00) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4x4_t r; + simde_float16x4x4_t src = { + {simde_vld1_f16(test_vec[i].src[0]), simde_vld1_f16(test_vec[i].src[1]), + simde_vld1_f16(test_vec[i].src[2]), simde_vld1_f16(test_vec[i].src[3])}}; + simde_float16x4x4_t expected = { + {simde_vld1_f16(test_vec[i].r[0]), simde_vld1_f16(test_vec[i].r[1]), + simde_vld1_f16(test_vec[i].r[2]), simde_vld1_f16(test_vec[i].r[3])}}; + + SIMDE_CONSTIFY_4_(simde_vld4_lane_f16, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + simde_test_arm_neon_assert_equal_f16x4(r.val[0], expected.val[0], 1); + simde_test_arm_neon_assert_equal_f16x4(r.val[1], expected.val[1], 1); + simde_test_arm_neon_assert_equal_f16x4(r.val[2], expected.val[2], 1); + simde_test_arm_neon_assert_equal_f16x4(r.val[3], expected.val[3], 1); + } + + return 0; +} + static int test_simde_vld4_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1714,6 +1779,171 @@ test_simde_vld4q_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vld4q_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t src[4][8]; + simde_float16_t buf[4]; + simde_float16_t r[4][8]; + } test_vec[] = { + { { { SIMDE_FLOAT16_VALUE(6.33), SIMDE_FLOAT16_VALUE(-26.33), SIMDE_FLOAT16_VALUE(-42.22), SIMDE_FLOAT16_VALUE(-34.31), + SIMDE_FLOAT16_VALUE(-14.99), SIMDE_FLOAT16_VALUE(-21.27), SIMDE_FLOAT16_VALUE(2.75), SIMDE_FLOAT16_VALUE(47.13) }, + { SIMDE_FLOAT16_VALUE(-43.49), SIMDE_FLOAT16_VALUE(17.65), SIMDE_FLOAT16_VALUE(39.50), SIMDE_FLOAT16_VALUE(-42.60), + SIMDE_FLOAT16_VALUE(36.05), SIMDE_FLOAT16_VALUE(-8.32), SIMDE_FLOAT16_VALUE(-40.92), SIMDE_FLOAT16_VALUE(18.06) }, + { SIMDE_FLOAT16_VALUE(-42.88), SIMDE_FLOAT16_VALUE(4.14), SIMDE_FLOAT16_VALUE(45.09), SIMDE_FLOAT16_VALUE(20.35), + SIMDE_FLOAT16_VALUE(-37.99), SIMDE_FLOAT16_VALUE(6.50), SIMDE_FLOAT16_VALUE(-31.38), SIMDE_FLOAT16_VALUE(7.92) }, + { SIMDE_FLOAT16_VALUE(-39.25), SIMDE_FLOAT16_VALUE(-28.22), SIMDE_FLOAT16_VALUE(-2.20), SIMDE_FLOAT16_VALUE(-44.73), + SIMDE_FLOAT16_VALUE(-39.75), SIMDE_FLOAT16_VALUE(48.64), SIMDE_FLOAT16_VALUE(-28.85), SIMDE_FLOAT16_VALUE(-41.84) } }, + { SIMDE_FLOAT16_VALUE(28.80), SIMDE_FLOAT16_VALUE(25.22), SIMDE_FLOAT16_VALUE(20.66), SIMDE_FLOAT16_VALUE(48.85)}, + { { SIMDE_FLOAT16_VALUE(28.80), SIMDE_FLOAT16_VALUE(-26.33), SIMDE_FLOAT16_VALUE(-42.22), SIMDE_FLOAT16_VALUE(-34.31), + SIMDE_FLOAT16_VALUE(-14.99), SIMDE_FLOAT16_VALUE(-21.27), SIMDE_FLOAT16_VALUE(2.75), SIMDE_FLOAT16_VALUE(47.13) }, + { SIMDE_FLOAT16_VALUE(25.22), SIMDE_FLOAT16_VALUE(17.65), SIMDE_FLOAT16_VALUE(39.50), SIMDE_FLOAT16_VALUE(-42.60), + SIMDE_FLOAT16_VALUE(36.05), SIMDE_FLOAT16_VALUE(-8.32), SIMDE_FLOAT16_VALUE(-40.92), SIMDE_FLOAT16_VALUE(18.06) }, + { SIMDE_FLOAT16_VALUE(20.66), SIMDE_FLOAT16_VALUE(4.14), SIMDE_FLOAT16_VALUE(45.09), SIMDE_FLOAT16_VALUE(20.35), + SIMDE_FLOAT16_VALUE(-37.99), SIMDE_FLOAT16_VALUE(6.50), SIMDE_FLOAT16_VALUE(-31.38), SIMDE_FLOAT16_VALUE(7.92) }, + { SIMDE_FLOAT16_VALUE(48.85), SIMDE_FLOAT16_VALUE(-28.22), SIMDE_FLOAT16_VALUE(-2.20), SIMDE_FLOAT16_VALUE(-44.73), + SIMDE_FLOAT16_VALUE(-39.75), SIMDE_FLOAT16_VALUE(48.64), SIMDE_FLOAT16_VALUE(-28.85), SIMDE_FLOAT16_VALUE(-41.84) } } }, + { { { SIMDE_FLOAT16_VALUE(42.30), SIMDE_FLOAT16_VALUE(4.65), SIMDE_FLOAT16_VALUE(-21.20), SIMDE_FLOAT16_VALUE(-43.56), + SIMDE_FLOAT16_VALUE(46.24), SIMDE_FLOAT16_VALUE(23.63), SIMDE_FLOAT16_VALUE(-33.71), SIMDE_FLOAT16_VALUE(-41.85) }, + { SIMDE_FLOAT16_VALUE(46.94), SIMDE_FLOAT16_VALUE(30.22), SIMDE_FLOAT16_VALUE(3.71), SIMDE_FLOAT16_VALUE(1.50), + SIMDE_FLOAT16_VALUE(-25.07), SIMDE_FLOAT16_VALUE(42.19), SIMDE_FLOAT16_VALUE(-19.27), SIMDE_FLOAT16_VALUE(-23.97) }, + { SIMDE_FLOAT16_VALUE(-27.49), SIMDE_FLOAT16_VALUE(41.45), SIMDE_FLOAT16_VALUE(43.62), SIMDE_FLOAT16_VALUE(28.98), + SIMDE_FLOAT16_VALUE(-2.13), SIMDE_FLOAT16_VALUE(24.85), SIMDE_FLOAT16_VALUE(-34.40), SIMDE_FLOAT16_VALUE(-36.80) }, + { SIMDE_FLOAT16_VALUE(9.29), SIMDE_FLOAT16_VALUE(19.68), SIMDE_FLOAT16_VALUE(-47.75), SIMDE_FLOAT16_VALUE(7.71), + SIMDE_FLOAT16_VALUE(-15.37), SIMDE_FLOAT16_VALUE(-22.42), SIMDE_FLOAT16_VALUE(-24.53), SIMDE_FLOAT16_VALUE(-31.46) } }, + { SIMDE_FLOAT16_VALUE(6.38), SIMDE_FLOAT16_VALUE(-5.55), SIMDE_FLOAT16_VALUE(14.51), SIMDE_FLOAT16_VALUE(22.81)}, + { { SIMDE_FLOAT16_VALUE(42.30), SIMDE_FLOAT16_VALUE(6.38), SIMDE_FLOAT16_VALUE(-21.20), SIMDE_FLOAT16_VALUE(-43.56), + SIMDE_FLOAT16_VALUE(46.24), SIMDE_FLOAT16_VALUE(23.63), SIMDE_FLOAT16_VALUE(-33.71), SIMDE_FLOAT16_VALUE(-41.85) }, + { SIMDE_FLOAT16_VALUE(46.94), SIMDE_FLOAT16_VALUE(-5.55), SIMDE_FLOAT16_VALUE(3.71), SIMDE_FLOAT16_VALUE(1.50), + SIMDE_FLOAT16_VALUE(-25.07), SIMDE_FLOAT16_VALUE(42.19), SIMDE_FLOAT16_VALUE(-19.27), SIMDE_FLOAT16_VALUE(-23.97) }, + { SIMDE_FLOAT16_VALUE(-27.49), SIMDE_FLOAT16_VALUE(14.51), SIMDE_FLOAT16_VALUE(43.62), SIMDE_FLOAT16_VALUE(28.98), + SIMDE_FLOAT16_VALUE(-2.13), SIMDE_FLOAT16_VALUE(24.85), SIMDE_FLOAT16_VALUE(-34.40), SIMDE_FLOAT16_VALUE(-36.80) }, + { SIMDE_FLOAT16_VALUE(9.29), SIMDE_FLOAT16_VALUE(22.81), SIMDE_FLOAT16_VALUE(-47.75), SIMDE_FLOAT16_VALUE(7.71), + SIMDE_FLOAT16_VALUE(-15.37), SIMDE_FLOAT16_VALUE(-22.42), SIMDE_FLOAT16_VALUE(-24.53), SIMDE_FLOAT16_VALUE(-31.46) } } }, + { { { SIMDE_FLOAT16_VALUE(4.77), SIMDE_FLOAT16_VALUE(19.23), SIMDE_FLOAT16_VALUE(19.73), SIMDE_FLOAT16_VALUE(-7.18), + SIMDE_FLOAT16_VALUE(-27.37), SIMDE_FLOAT16_VALUE(-20.24), SIMDE_FLOAT16_VALUE(49.18), SIMDE_FLOAT16_VALUE(-24.81) }, + { SIMDE_FLOAT16_VALUE(-11.08), SIMDE_FLOAT16_VALUE(-46.49), SIMDE_FLOAT16_VALUE(2.52), SIMDE_FLOAT16_VALUE(26.02), + SIMDE_FLOAT16_VALUE(24.04), SIMDE_FLOAT16_VALUE(47.33), SIMDE_FLOAT16_VALUE(-44.39), SIMDE_FLOAT16_VALUE(12.98) }, + { SIMDE_FLOAT16_VALUE(-31.55), SIMDE_FLOAT16_VALUE(14.16), SIMDE_FLOAT16_VALUE(-20.43), SIMDE_FLOAT16_VALUE(-34.07), + SIMDE_FLOAT16_VALUE(-49.98), SIMDE_FLOAT16_VALUE(10.50), SIMDE_FLOAT16_VALUE(15.65), SIMDE_FLOAT16_VALUE(-48.67) }, + { SIMDE_FLOAT16_VALUE(-11.29), SIMDE_FLOAT16_VALUE(1.88), SIMDE_FLOAT16_VALUE(-32.68), SIMDE_FLOAT16_VALUE(-24.01), + SIMDE_FLOAT16_VALUE(-1.11), SIMDE_FLOAT16_VALUE(3.86), SIMDE_FLOAT16_VALUE(27.69), SIMDE_FLOAT16_VALUE(26.05) } }, + { SIMDE_FLOAT16_VALUE(10.65), SIMDE_FLOAT16_VALUE(-21.85), SIMDE_FLOAT16_VALUE(14.55), SIMDE_FLOAT16_VALUE(6.24)}, + { { SIMDE_FLOAT16_VALUE(4.77), SIMDE_FLOAT16_VALUE(19.23), SIMDE_FLOAT16_VALUE(10.65), SIMDE_FLOAT16_VALUE(-7.18), + SIMDE_FLOAT16_VALUE(-27.37), SIMDE_FLOAT16_VALUE(-20.24), SIMDE_FLOAT16_VALUE(49.18), SIMDE_FLOAT16_VALUE(-24.81) }, + { SIMDE_FLOAT16_VALUE(-11.08), SIMDE_FLOAT16_VALUE(-46.49), SIMDE_FLOAT16_VALUE(-21.85), SIMDE_FLOAT16_VALUE(26.02), + SIMDE_FLOAT16_VALUE(24.04), SIMDE_FLOAT16_VALUE(47.33), SIMDE_FLOAT16_VALUE(-44.39), SIMDE_FLOAT16_VALUE(12.98) }, + { SIMDE_FLOAT16_VALUE(-31.55), SIMDE_FLOAT16_VALUE(14.16), SIMDE_FLOAT16_VALUE(14.55), SIMDE_FLOAT16_VALUE(-34.07), + SIMDE_FLOAT16_VALUE(-49.98), SIMDE_FLOAT16_VALUE(10.50), SIMDE_FLOAT16_VALUE(15.65), SIMDE_FLOAT16_VALUE(-48.67) }, + { SIMDE_FLOAT16_VALUE(-11.29), SIMDE_FLOAT16_VALUE(1.88), SIMDE_FLOAT16_VALUE(6.24), SIMDE_FLOAT16_VALUE(-24.01), + SIMDE_FLOAT16_VALUE(-1.11), SIMDE_FLOAT16_VALUE(3.86), SIMDE_FLOAT16_VALUE(27.69), SIMDE_FLOAT16_VALUE(26.05) } } }, + { { { SIMDE_FLOAT16_VALUE(-39.56), SIMDE_FLOAT16_VALUE(46.56), SIMDE_FLOAT16_VALUE(37.88), SIMDE_FLOAT16_VALUE(-34.85), + SIMDE_FLOAT16_VALUE(-42.50), SIMDE_FLOAT16_VALUE(47.33), SIMDE_FLOAT16_VALUE(30.39), SIMDE_FLOAT16_VALUE(26.97) }, + { SIMDE_FLOAT16_VALUE(12.13), SIMDE_FLOAT16_VALUE(13.90), SIMDE_FLOAT16_VALUE(-11.09), SIMDE_FLOAT16_VALUE(-46.55), + SIMDE_FLOAT16_VALUE(-25.60), SIMDE_FLOAT16_VALUE(36.32), SIMDE_FLOAT16_VALUE(38.43), SIMDE_FLOAT16_VALUE(-15.44) }, + { SIMDE_FLOAT16_VALUE(14.57), SIMDE_FLOAT16_VALUE(-33.72), SIMDE_FLOAT16_VALUE(17.48), SIMDE_FLOAT16_VALUE(-41.97), + SIMDE_FLOAT16_VALUE(39.90), SIMDE_FLOAT16_VALUE(12.95), SIMDE_FLOAT16_VALUE(-24.90), SIMDE_FLOAT16_VALUE(22.89) }, + { SIMDE_FLOAT16_VALUE(33.49), SIMDE_FLOAT16_VALUE(-29.52), SIMDE_FLOAT16_VALUE(-42.35), SIMDE_FLOAT16_VALUE(11.54), + SIMDE_FLOAT16_VALUE(-19.29), SIMDE_FLOAT16_VALUE(-3.97), SIMDE_FLOAT16_VALUE(39.49), SIMDE_FLOAT16_VALUE(34.87) } }, + { SIMDE_FLOAT16_VALUE(22.68), SIMDE_FLOAT16_VALUE(-10.73), SIMDE_FLOAT16_VALUE(1.03), SIMDE_FLOAT16_VALUE(9.92)}, + { { SIMDE_FLOAT16_VALUE(-39.56), SIMDE_FLOAT16_VALUE(46.56), SIMDE_FLOAT16_VALUE(37.88), SIMDE_FLOAT16_VALUE(22.68), + SIMDE_FLOAT16_VALUE(-42.50), SIMDE_FLOAT16_VALUE(47.33), SIMDE_FLOAT16_VALUE(30.39), SIMDE_FLOAT16_VALUE(26.97) }, + { SIMDE_FLOAT16_VALUE(12.13), SIMDE_FLOAT16_VALUE(13.90), SIMDE_FLOAT16_VALUE(-11.09), SIMDE_FLOAT16_VALUE(-10.73), + SIMDE_FLOAT16_VALUE(-25.60), SIMDE_FLOAT16_VALUE(36.32), SIMDE_FLOAT16_VALUE(38.43), SIMDE_FLOAT16_VALUE(-15.44) }, + { SIMDE_FLOAT16_VALUE(14.57), SIMDE_FLOAT16_VALUE(-33.72), SIMDE_FLOAT16_VALUE(17.48), SIMDE_FLOAT16_VALUE(1.03), + SIMDE_FLOAT16_VALUE(39.90), SIMDE_FLOAT16_VALUE(12.95), SIMDE_FLOAT16_VALUE(-24.90), SIMDE_FLOAT16_VALUE(22.89) }, + { SIMDE_FLOAT16_VALUE(33.49), SIMDE_FLOAT16_VALUE(-29.52), SIMDE_FLOAT16_VALUE(-42.35), SIMDE_FLOAT16_VALUE(9.92), + SIMDE_FLOAT16_VALUE(-19.29), SIMDE_FLOAT16_VALUE(-3.97), SIMDE_FLOAT16_VALUE(39.49), SIMDE_FLOAT16_VALUE(34.87) } } }, + { { { SIMDE_FLOAT16_VALUE(-8.17), SIMDE_FLOAT16_VALUE(20.98), SIMDE_FLOAT16_VALUE(-36.48), SIMDE_FLOAT16_VALUE(-2.58), + SIMDE_FLOAT16_VALUE(45.57), SIMDE_FLOAT16_VALUE(1.57), SIMDE_FLOAT16_VALUE(-6.19), SIMDE_FLOAT16_VALUE(-12.44) }, + { SIMDE_FLOAT16_VALUE(45.85), SIMDE_FLOAT16_VALUE(24.95), SIMDE_FLOAT16_VALUE(28.89), SIMDE_FLOAT16_VALUE(49.69), + SIMDE_FLOAT16_VALUE(1.21), SIMDE_FLOAT16_VALUE(-33.38), SIMDE_FLOAT16_VALUE(-40.61), SIMDE_FLOAT16_VALUE(21.68) }, + { SIMDE_FLOAT16_VALUE(46.23), SIMDE_FLOAT16_VALUE(-16.19), SIMDE_FLOAT16_VALUE(-35.91), SIMDE_FLOAT16_VALUE(43.87), + SIMDE_FLOAT16_VALUE(30.36), SIMDE_FLOAT16_VALUE(-1.03), SIMDE_FLOAT16_VALUE(19.56), SIMDE_FLOAT16_VALUE(33.86) }, + { SIMDE_FLOAT16_VALUE(-24.84), SIMDE_FLOAT16_VALUE(13.50), SIMDE_FLOAT16_VALUE(29.52), SIMDE_FLOAT16_VALUE(41.09), + SIMDE_FLOAT16_VALUE(-18.40), SIMDE_FLOAT16_VALUE(40.70), SIMDE_FLOAT16_VALUE(0.08), SIMDE_FLOAT16_VALUE(19.83) } }, + { SIMDE_FLOAT16_VALUE(38.14), SIMDE_FLOAT16_VALUE(-31.15), SIMDE_FLOAT16_VALUE(18.94), SIMDE_FLOAT16_VALUE(21.21)}, + { { SIMDE_FLOAT16_VALUE(-8.17), SIMDE_FLOAT16_VALUE(20.98), SIMDE_FLOAT16_VALUE(-36.48), SIMDE_FLOAT16_VALUE(-2.58), + SIMDE_FLOAT16_VALUE(38.14), SIMDE_FLOAT16_VALUE(1.57), SIMDE_FLOAT16_VALUE(-6.19), SIMDE_FLOAT16_VALUE(-12.44) }, + { SIMDE_FLOAT16_VALUE(45.85), SIMDE_FLOAT16_VALUE(24.95), SIMDE_FLOAT16_VALUE(28.89), SIMDE_FLOAT16_VALUE(49.69), + SIMDE_FLOAT16_VALUE(-31.15), SIMDE_FLOAT16_VALUE(-33.38), SIMDE_FLOAT16_VALUE(-40.61), SIMDE_FLOAT16_VALUE(21.68) }, + { SIMDE_FLOAT16_VALUE(46.23), SIMDE_FLOAT16_VALUE(-16.19), SIMDE_FLOAT16_VALUE(-35.91), SIMDE_FLOAT16_VALUE(43.87), + SIMDE_FLOAT16_VALUE(18.94), SIMDE_FLOAT16_VALUE(-1.03), SIMDE_FLOAT16_VALUE(19.56), SIMDE_FLOAT16_VALUE(33.86) }, + { SIMDE_FLOAT16_VALUE(-24.84), SIMDE_FLOAT16_VALUE(13.50), SIMDE_FLOAT16_VALUE(29.52), SIMDE_FLOAT16_VALUE(41.09), + SIMDE_FLOAT16_VALUE(21.21), SIMDE_FLOAT16_VALUE(40.70), SIMDE_FLOAT16_VALUE(0.08), SIMDE_FLOAT16_VALUE(19.83) } } }, + { { { SIMDE_FLOAT16_VALUE(32.92), SIMDE_FLOAT16_VALUE(-0.80), SIMDE_FLOAT16_VALUE(-42.65), SIMDE_FLOAT16_VALUE(-15.22), + SIMDE_FLOAT16_VALUE(-16.39), SIMDE_FLOAT16_VALUE(11.52), SIMDE_FLOAT16_VALUE(-4.14), SIMDE_FLOAT16_VALUE(8.87) }, + { SIMDE_FLOAT16_VALUE(-35.81), SIMDE_FLOAT16_VALUE(12.74), SIMDE_FLOAT16_VALUE(-12.60), SIMDE_FLOAT16_VALUE(21.59), + SIMDE_FLOAT16_VALUE(-11.53), SIMDE_FLOAT16_VALUE(44.02), SIMDE_FLOAT16_VALUE(30.30), SIMDE_FLOAT16_VALUE(43.42) }, + { SIMDE_FLOAT16_VALUE(16.67), SIMDE_FLOAT16_VALUE(0.23), SIMDE_FLOAT16_VALUE(-4.88), SIMDE_FLOAT16_VALUE(-5.00), + SIMDE_FLOAT16_VALUE(36.67), SIMDE_FLOAT16_VALUE(43.56), SIMDE_FLOAT16_VALUE(-15.15), SIMDE_FLOAT16_VALUE(-34.82) }, + { SIMDE_FLOAT16_VALUE(-22.22), SIMDE_FLOAT16_VALUE(-41.78), SIMDE_FLOAT16_VALUE(-15.90), SIMDE_FLOAT16_VALUE(-36.65), + SIMDE_FLOAT16_VALUE(34.10), SIMDE_FLOAT16_VALUE(22.36), SIMDE_FLOAT16_VALUE(-4.57), SIMDE_FLOAT16_VALUE(-42.90) } }, + { SIMDE_FLOAT16_VALUE(-30.23), SIMDE_FLOAT16_VALUE(-33.22), SIMDE_FLOAT16_VALUE(14.26), SIMDE_FLOAT16_VALUE(-48.15)}, + { { SIMDE_FLOAT16_VALUE(32.92), SIMDE_FLOAT16_VALUE(-0.80), SIMDE_FLOAT16_VALUE(-42.65), SIMDE_FLOAT16_VALUE(-15.22), + SIMDE_FLOAT16_VALUE(-16.39), SIMDE_FLOAT16_VALUE(-30.23), SIMDE_FLOAT16_VALUE(-4.14), SIMDE_FLOAT16_VALUE(8.87) }, + { SIMDE_FLOAT16_VALUE(-35.81), SIMDE_FLOAT16_VALUE(12.74), SIMDE_FLOAT16_VALUE(-12.60), SIMDE_FLOAT16_VALUE(21.59), + SIMDE_FLOAT16_VALUE(-11.53), SIMDE_FLOAT16_VALUE(-33.22), SIMDE_FLOAT16_VALUE(30.30), SIMDE_FLOAT16_VALUE(43.42) }, + { SIMDE_FLOAT16_VALUE(16.67), SIMDE_FLOAT16_VALUE(0.23), SIMDE_FLOAT16_VALUE(-4.88), SIMDE_FLOAT16_VALUE(-5.00), + SIMDE_FLOAT16_VALUE(36.67), SIMDE_FLOAT16_VALUE(14.26), SIMDE_FLOAT16_VALUE(-15.15), SIMDE_FLOAT16_VALUE(-34.82) }, + { SIMDE_FLOAT16_VALUE(-22.22), SIMDE_FLOAT16_VALUE(-41.78), SIMDE_FLOAT16_VALUE(-15.90), SIMDE_FLOAT16_VALUE(-36.65), + SIMDE_FLOAT16_VALUE(34.10), SIMDE_FLOAT16_VALUE(-48.15), SIMDE_FLOAT16_VALUE(-4.57), SIMDE_FLOAT16_VALUE(-42.90) } } }, + { { { SIMDE_FLOAT16_VALUE(30.57), SIMDE_FLOAT16_VALUE(-2.63), SIMDE_FLOAT16_VALUE(-44.63), SIMDE_FLOAT16_VALUE(15.88), + SIMDE_FLOAT16_VALUE(10.45), SIMDE_FLOAT16_VALUE(45.63), SIMDE_FLOAT16_VALUE(9.06), SIMDE_FLOAT16_VALUE(-38.74) }, + { SIMDE_FLOAT16_VALUE(-26.14), SIMDE_FLOAT16_VALUE(24.78), SIMDE_FLOAT16_VALUE(19.94), SIMDE_FLOAT16_VALUE(-27.09), + SIMDE_FLOAT16_VALUE(42.67), SIMDE_FLOAT16_VALUE(1.81), SIMDE_FLOAT16_VALUE(16.33), SIMDE_FLOAT16_VALUE(-11.55) }, + { SIMDE_FLOAT16_VALUE(-48.80), SIMDE_FLOAT16_VALUE(16.42), SIMDE_FLOAT16_VALUE(-47.87), SIMDE_FLOAT16_VALUE(-31.96), + SIMDE_FLOAT16_VALUE(22.97), SIMDE_FLOAT16_VALUE(-48.97), SIMDE_FLOAT16_VALUE(-19.78), SIMDE_FLOAT16_VALUE(-5.38) }, + { SIMDE_FLOAT16_VALUE(-15.38), SIMDE_FLOAT16_VALUE(21.21), SIMDE_FLOAT16_VALUE(23.35), SIMDE_FLOAT16_VALUE(0.40), + SIMDE_FLOAT16_VALUE(25.56), SIMDE_FLOAT16_VALUE(31.88), SIMDE_FLOAT16_VALUE(34.93), SIMDE_FLOAT16_VALUE(30.41) } }, + { SIMDE_FLOAT16_VALUE(-17.21), SIMDE_FLOAT16_VALUE(-48.48), SIMDE_FLOAT16_VALUE(-7.06), SIMDE_FLOAT16_VALUE(-26.40)}, + { { SIMDE_FLOAT16_VALUE(30.57), SIMDE_FLOAT16_VALUE(-2.63), SIMDE_FLOAT16_VALUE(-44.63), SIMDE_FLOAT16_VALUE(15.88), + SIMDE_FLOAT16_VALUE(10.45), SIMDE_FLOAT16_VALUE(45.63), SIMDE_FLOAT16_VALUE(-17.21), SIMDE_FLOAT16_VALUE(-38.74) }, + { SIMDE_FLOAT16_VALUE(-26.14), SIMDE_FLOAT16_VALUE(24.78), SIMDE_FLOAT16_VALUE(19.94), SIMDE_FLOAT16_VALUE(-27.09), + SIMDE_FLOAT16_VALUE(42.67), SIMDE_FLOAT16_VALUE(1.81), SIMDE_FLOAT16_VALUE(-48.48), SIMDE_FLOAT16_VALUE(-11.55) }, + { SIMDE_FLOAT16_VALUE(-48.80), SIMDE_FLOAT16_VALUE(16.42), SIMDE_FLOAT16_VALUE(-47.87), SIMDE_FLOAT16_VALUE(-31.96), + SIMDE_FLOAT16_VALUE(22.97), SIMDE_FLOAT16_VALUE(-48.97), SIMDE_FLOAT16_VALUE(-7.06), SIMDE_FLOAT16_VALUE(-5.38) }, + { SIMDE_FLOAT16_VALUE(-15.38), SIMDE_FLOAT16_VALUE(21.21), SIMDE_FLOAT16_VALUE(23.35), SIMDE_FLOAT16_VALUE(0.40), + SIMDE_FLOAT16_VALUE(25.56), SIMDE_FLOAT16_VALUE(31.88), SIMDE_FLOAT16_VALUE(-26.40), SIMDE_FLOAT16_VALUE(30.41) } } }, + { { { SIMDE_FLOAT16_VALUE(9.17), SIMDE_FLOAT16_VALUE(21.02), SIMDE_FLOAT16_VALUE(-31.99), SIMDE_FLOAT16_VALUE(40.72), + SIMDE_FLOAT16_VALUE(-48.40), SIMDE_FLOAT16_VALUE(-16.85), SIMDE_FLOAT16_VALUE(-28.29), SIMDE_FLOAT16_VALUE(-25.87) }, + { SIMDE_FLOAT16_VALUE(10.04), SIMDE_FLOAT16_VALUE(11.19), SIMDE_FLOAT16_VALUE(-9.39), SIMDE_FLOAT16_VALUE(43.50), + SIMDE_FLOAT16_VALUE(20.72), SIMDE_FLOAT16_VALUE(16.81), SIMDE_FLOAT16_VALUE(31.83), SIMDE_FLOAT16_VALUE(24.46) }, + { SIMDE_FLOAT16_VALUE(-6.31), SIMDE_FLOAT16_VALUE(-39.46), SIMDE_FLOAT16_VALUE(-33.35), SIMDE_FLOAT16_VALUE(11.15), + SIMDE_FLOAT16_VALUE(-7.05), SIMDE_FLOAT16_VALUE(26.22), SIMDE_FLOAT16_VALUE(-36.64), SIMDE_FLOAT16_VALUE(37.19) }, + { SIMDE_FLOAT16_VALUE(13.89), SIMDE_FLOAT16_VALUE(26.04), SIMDE_FLOAT16_VALUE(-44.10), SIMDE_FLOAT16_VALUE(37.22), + SIMDE_FLOAT16_VALUE(10.55), SIMDE_FLOAT16_VALUE(-21.52), SIMDE_FLOAT16_VALUE(-10.01), SIMDE_FLOAT16_VALUE(16.56) } }, + { SIMDE_FLOAT16_VALUE(-30.72), SIMDE_FLOAT16_VALUE(-43.73), SIMDE_FLOAT16_VALUE(28.26), SIMDE_FLOAT16_VALUE(-33.82)}, + { { SIMDE_FLOAT16_VALUE(9.17), SIMDE_FLOAT16_VALUE(21.02), SIMDE_FLOAT16_VALUE(-31.99), SIMDE_FLOAT16_VALUE(40.72), + SIMDE_FLOAT16_VALUE(-48.40), SIMDE_FLOAT16_VALUE(-16.85), SIMDE_FLOAT16_VALUE(-28.29), SIMDE_FLOAT16_VALUE(-30.72) }, + { SIMDE_FLOAT16_VALUE(10.04), SIMDE_FLOAT16_VALUE(11.19), SIMDE_FLOAT16_VALUE(-9.39), SIMDE_FLOAT16_VALUE(43.50), + SIMDE_FLOAT16_VALUE(20.72), SIMDE_FLOAT16_VALUE(16.81), SIMDE_FLOAT16_VALUE(31.83), SIMDE_FLOAT16_VALUE(-43.73) }, + { SIMDE_FLOAT16_VALUE(-6.31), SIMDE_FLOAT16_VALUE(-39.46), SIMDE_FLOAT16_VALUE(-33.35), SIMDE_FLOAT16_VALUE(11.15), + SIMDE_FLOAT16_VALUE(-7.05), SIMDE_FLOAT16_VALUE(26.22), SIMDE_FLOAT16_VALUE(-36.64), SIMDE_FLOAT16_VALUE(28.26) }, + { SIMDE_FLOAT16_VALUE(13.89), SIMDE_FLOAT16_VALUE(26.04), SIMDE_FLOAT16_VALUE(-44.10), SIMDE_FLOAT16_VALUE(37.22), + SIMDE_FLOAT16_VALUE(10.55), SIMDE_FLOAT16_VALUE(-21.52), SIMDE_FLOAT16_VALUE(-10.01), SIMDE_FLOAT16_VALUE(-33.82) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8x4_t r; + simde_float16x8x4_t src = { + {simde_vld1q_f16(test_vec[i].src[0]), simde_vld1q_f16(test_vec[i].src[1]), + simde_vld1q_f16(test_vec[i].src[2]), simde_vld1q_f16(test_vec[i].src[3])}}; + simde_float16x8x4_t expected = { + {simde_vld1q_f16(test_vec[i].r[0]), simde_vld1q_f16(test_vec[i].r[1]), + simde_vld1q_f16(test_vec[i].r[2]), simde_vld1q_f16(test_vec[i].r[3])}}; + + SIMDE_CONSTIFY_8_(simde_vld4q_lane_f16, r, (HEDLEY_UNREACHABLE(), r), i, test_vec[i].buf, src); + + simde_test_arm_neon_assert_equal_f16x8(r.val[0], expected.val[0], 1); + simde_test_arm_neon_assert_equal_f16x8(r.val[1], expected.val[1], 1); + simde_test_arm_neon_assert_equal_f16x8(r.val[2], expected.val[2], 1); + simde_test_arm_neon_assert_equal_f16x8(r.val[3], expected.val[3], 1); + } + + return 0; +} + static int test_simde_vld4q_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1877,6 +2107,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vld4_lane_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vld4_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vld4_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vld4_lane_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4_lane_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vld4_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vld4_lane_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vld4q_lane_s8) @@ -1887,6 +2118,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vld4q_lane_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vld4q_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vld4q_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vld4q_lane_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4q_lane_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vld4q_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vld4q_lane_f64) #endif /* !defined(SIMDE_BUG_INTEL_857088) */ diff --git a/test/arm/neon/mla_lane.c b/test/arm/neon/mla_lane.c index 458c03df1..d21b298a9 100644 --- a/test/arm/neon/mla_lane.c +++ b/test/arm/neon/mla_lane.c @@ -6,6 +6,77 @@ SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +static int +test_simde_vmla_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + float a[2]; + float b[2]; + float v[4]; + int8_t lane; + float r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( -73.46), SIMDE_FLOAT32_C( 513.31) }, + { SIMDE_FLOAT32_C( 859.54), SIMDE_FLOAT32_C( 351.95) }, + { SIMDE_FLOAT32_C( 623.37), SIMDE_FLOAT32_C( -773.90), SIMDE_FLOAT32_C( -85.33), SIMDE_FLOAT32_C( -870.53) }, + INT8_C( 1), + { SIMDE_FLOAT32_C(-665271.47), SIMDE_FLOAT32_C(-271860.79) } }, + { { SIMDE_FLOAT32_C( -274.57), SIMDE_FLOAT32_C( 808.41) }, + { SIMDE_FLOAT32_C( -90.97), SIMDE_FLOAT32_C( -714.05) }, + { SIMDE_FLOAT32_C( -922.73), SIMDE_FLOAT32_C( -411.88), SIMDE_FLOAT32_C( 281.90), SIMDE_FLOAT32_C( -63.90) }, + INT8_C( 2), + { SIMDE_FLOAT32_C( -25919.01), SIMDE_FLOAT32_C(-200482.28) } }, + { { SIMDE_FLOAT32_C( -741.09), SIMDE_FLOAT32_C( 445.38) }, + { SIMDE_FLOAT32_C( -398.05), SIMDE_FLOAT32_C( -855.86) }, + { SIMDE_FLOAT32_C( -570.81), SIMDE_FLOAT32_C( -970.95), SIMDE_FLOAT32_C( -485.13), SIMDE_FLOAT32_C( -172.61) }, + INT8_C( 3), + { SIMDE_FLOAT32_C( 67966.32), SIMDE_FLOAT32_C( 148175.37) } }, + { { SIMDE_FLOAT32_C( 33.94), SIMDE_FLOAT32_C( 293.45) }, + { SIMDE_FLOAT32_C( -588.66), SIMDE_FLOAT32_C( 519.96) }, + { SIMDE_FLOAT32_C( -139.60), SIMDE_FLOAT32_C( 433.70), SIMDE_FLOAT32_C( 837.57), SIMDE_FLOAT32_C( -714.84) }, + INT8_C( 3), + { SIMDE_FLOAT32_C( 420831.65), SIMDE_FLOAT32_C(-371394.76) } }, + { { SIMDE_FLOAT32_C( -795.09), SIMDE_FLOAT32_C( 16.24) }, + { SIMDE_FLOAT32_C( -182.80), SIMDE_FLOAT32_C( 647.98) }, + { SIMDE_FLOAT32_C( 196.21), SIMDE_FLOAT32_C( 269.55), SIMDE_FLOAT32_C( 855.47), SIMDE_FLOAT32_C( -864.67) }, + INT8_C( 2), + { SIMDE_FLOAT32_C(-157175.01), SIMDE_FLOAT32_C( 554343.69) } }, + { { SIMDE_FLOAT32_C( -464.62), SIMDE_FLOAT32_C( 878.96) }, + { SIMDE_FLOAT32_C( 482.06), SIMDE_FLOAT32_C( -189.13) }, + { SIMDE_FLOAT32_C( 998.51), SIMDE_FLOAT32_C( 249.98), SIMDE_FLOAT32_C( -423.89), SIMDE_FLOAT32_C( 412.87) }, + INT8_C( 2), + { SIMDE_FLOAT32_C(-204805.03), SIMDE_FLOAT32_C( 81049.28) } }, + { { SIMDE_FLOAT32_C( -949.96), SIMDE_FLOAT32_C( 459.88) }, + { SIMDE_FLOAT32_C( -612.87), SIMDE_FLOAT32_C( 624.30) }, + { SIMDE_FLOAT32_C( 216.59), SIMDE_FLOAT32_C( 639.02), SIMDE_FLOAT32_C( -234.22), SIMDE_FLOAT32_C( -127.73) }, + INT8_C( 3), + { SIMDE_FLOAT32_C( 77331.93), SIMDE_FLOAT32_C( -79281.96) } }, + { { SIMDE_FLOAT32_C( -495.83), SIMDE_FLOAT32_C( -373.57) }, + { SIMDE_FLOAT32_C( -270.56), SIMDE_FLOAT32_C( -712.92) }, + { SIMDE_FLOAT32_C( 565.27), SIMDE_FLOAT32_C( 939.53), SIMDE_FLOAT32_C( 228.62), SIMDE_FLOAT32_C( -248.99) }, + INT8_C( 1), + { SIMDE_FLOAT32_C(-254695.07), SIMDE_FLOAT32_C(-670183.30) } }, + }; + + simde_float32x2_t r, a, b; + simde_float32x4_t v; + + for (int i = 0 ; i < 8 ; i++) { + a = simde_vld1_f32(test_vec[i].a); + b = simde_vld1_f32(test_vec[i].b); + v = simde_vld1q_f32(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmla_laneq_f32(a, b, v, 0); break; + case 1: r = simde_vmla_laneq_f32(a, b, v, 1); break; + case 2: r = simde_vmla_laneq_f32(a, b, v, 2); break; + case 3: r = simde_vmla_laneq_f32(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdup_n_f32(0); break; + } + simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); + } + + return 0; +} + static int test_simde_vmla_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -94,6 +165,89 @@ test_simde_vmla_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vmla_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[4]; + int16_t b[4]; + int16_t v[8]; + int8_t lane; + int16_t r[4]; + } test_vec[] = { + { { -INT16_C( 81), -INT16_C( 48), INT16_C( 94), INT16_C( 69) }, + { INT16_C( 40), -INT16_C( 89), -INT16_C( 92), INT16_C( 90) }, + { INT16_C( 94), INT16_C( 38), -INT16_C( 15), -INT16_C( 13), + -INT16_C( 79), -INT16_C( 41), -INT16_C( 66), INT16_C( 0) }, + INT8_C( 0), + { INT16_C( 3679), -INT16_C( 8414), -INT16_C( 8554), INT16_C( 8529) } }, + { { -INT16_C( 14), -INT16_C( 78), -INT16_C( 58), -INT16_C( 39) }, + { -INT16_C( 85), -INT16_C( 52), INT16_C( 85), INT16_C( 77) }, + { INT16_C( 76), -INT16_C( 10), INT16_C( 26), INT16_C( 82), + -INT16_C( 46), INT16_C( 48), -INT16_C( 40), INT16_C( 93) }, + INT8_C( 6), + { INT16_C( 3386), INT16_C( 2002), -INT16_C( 3458), -INT16_C( 3119) } }, + { { INT16_C( 82), -INT16_C( 97), INT16_C( 61), INT16_C( 86) }, + { INT16_C( 78), -INT16_C( 66), INT16_C( 69), -INT16_C( 34) }, + { INT16_C( 79), -INT16_C( 74), INT16_C( 4), -INT16_C( 49), + INT16_C( 22), -INT16_C( 87), -INT16_C( 48), -INT16_C( 37) }, + INT8_C( 0), + { INT16_C( 6244), -INT16_C( 5311), INT16_C( 5512), -INT16_C( 2600) } }, + { { -INT16_C( 3), -INT16_C( 26), INT16_C( 68), INT16_C( 10) }, + { INT16_C( 9), -INT16_C( 39), INT16_C( 24), -INT16_C( 13) }, + { -INT16_C( 39), -INT16_C( 87), -INT16_C( 79), INT16_C( 42), + -INT16_C( 23), INT16_C( 56), -INT16_C( 22), -INT16_C( 41) }, + INT8_C( 7), + { -INT16_C( 372), INT16_C( 1573), -INT16_C( 916), INT16_C( 543) } }, + { { INT16_C( 0), -INT16_C( 60), -INT16_C( 14), -INT16_C( 40) }, + { -INT16_C( 46), -INT16_C( 13), INT16_C( 45), -INT16_C( 6) }, + { -INT16_C( 39), INT16_C( 97), INT16_C( 59), -INT16_C( 65), + -INT16_C( 22), -INT16_C( 35), -INT16_C( 62), INT16_C( 51) }, + INT8_C( 0), + { INT16_C( 1794), INT16_C( 447), -INT16_C( 1769), INT16_C( 194) } }, + { { -INT16_C( 72), INT16_C( 43), INT16_C( 0), -INT16_C( 15) }, + { -INT16_C( 96), INT16_C( 98), -INT16_C( 5), INT16_C( 88) }, + { INT16_C( 70), INT16_C( 21), -INT16_C( 54), -INT16_C( 29), + INT16_C( 83), INT16_C( 62), INT16_C( 7), -INT16_C( 7) }, + INT8_C( 7), + { INT16_C( 600), -INT16_C( 643), INT16_C( 35), -INT16_C( 631) } }, + { { -INT16_C( 57), -INT16_C( 65), INT16_C( 87), INT16_C( 11) }, + { -INT16_C( 36), -INT16_C( 26), INT16_C( 46), INT16_C( 29) }, + { -INT16_C( 82), INT16_C( 96), INT16_C( 14), INT16_C( 54), + -INT16_C( 42), INT16_C( 5), -INT16_C( 47), -INT16_C( 37) }, + INT8_C( 5), + { -INT16_C( 237), -INT16_C( 195), INT16_C( 317), INT16_C( 156) } }, + { { INT16_C( 46), -INT16_C( 88), -INT16_C( 14), -INT16_C( 73) }, + { -INT16_C( 12), INT16_C( 31), INT16_C( 0), -INT16_C( 48) }, + { -INT16_C( 96), -INT16_C( 87), -INT16_C( 55), -INT16_C( 93), + -INT16_C( 55), -INT16_C( 34), INT16_C( 63), INT16_C( 65) }, + INT8_C( 1), + { INT16_C( 1090), -INT16_C( 2785), -INT16_C( 14), INT16_C( 4103) } }, + }; + + simde_int16x4_t r, a, b; + simde_int16x8_t v; + + for (int i = 0 ; i < 8 ; i++) { + a = simde_vld1_s16(test_vec[i].a); + b = simde_vld1_s16(test_vec[i].b); + v = simde_vld1q_s16(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmla_laneq_s16(a, b, v, 0); break; + case 1: r = simde_vmla_laneq_s16(a, b, v, 1); break; + case 2: r = simde_vmla_laneq_s16(a, b, v, 2); break; + case 3: r = simde_vmla_laneq_s16(a, b, v, 3); break; + case 4: r = simde_vmla_laneq_s16(a, b, v, 4); break; + case 5: r = simde_vmla_laneq_s16(a, b, v, 5); break; + case 6: r = simde_vmla_laneq_s16(a, b, v, 6); break; + case 7: r = simde_vmla_laneq_s16(a, b, v, 7); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdup_n_s16(0); break; + } + simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); + } + + return 0; +} + static int test_simde_vmla_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -184,6 +338,77 @@ test_simde_vmla_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vmla_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[2]; + int32_t b[2]; + int32_t v[4]; + int8_t lane; + int32_t r[2]; + } test_vec[] = { + { { INT32_C( 3861), INT32_C( 5159) }, + { INT32_C( 9760), -INT32_C( 3715) }, + { -INT32_C( 9595), -INT32_C( 2488), INT32_C( 4866), -INT32_C( 5704) }, + INT8_C( 2), + { INT32_C( 47496021), -INT32_C( 18072031) } }, + { { INT32_C( 5007), -INT32_C( 7329) }, + { -INT32_C( 6236), INT32_C( 6988) }, + { -INT32_C( 612), INT32_C( 5079), -INT32_C( 8538), -INT32_C( 3459) }, + INT8_C( 1), + { -INT32_C( 31667637), INT32_C( 35484723) } }, + { { INT32_C( 1305), -INT32_C( 87) }, + { -INT32_C( 3936), INT32_C( 9528) }, + { INT32_C( 7150), -INT32_C( 961), INT32_C( 3279), -INT32_C( 1449) }, + INT8_C( 1), + { INT32_C( 3783801), -INT32_C( 9156495) } }, + { { -INT32_C( 1929), -INT32_C( 1565) }, + { INT32_C( 3306), -INT32_C( 8150) }, + { -INT32_C( 4113), -INT32_C( 7811), -INT32_C( 7275), INT32_C( 2429) }, + INT8_C( 2), + { -INT32_C( 24053079), INT32_C( 59289685) } }, + { { INT32_C( 3468), INT32_C( 5295) }, + { INT32_C( 1666), -INT32_C( 3947) }, + { INT32_C( 6721), -INT32_C( 1160), INT32_C( 6628), INT32_C( 2152) }, + INT8_C( 0), + { INT32_C( 11200654), -INT32_C( 26522492) } }, + { { INT32_C( 4696), INT32_C( 717) }, + { -INT32_C( 7531), -INT32_C( 1614) }, + { INT32_C( 129), -INT32_C( 7076), -INT32_C( 1025), -INT32_C( 5723) }, + INT8_C( 3), + { INT32_C( 43104609), INT32_C( 9237639) } }, + { { -INT32_C( 764), INT32_C( 296) }, + { -INT32_C( 8065), INT32_C( 1700) }, + { INT32_C( 8396), -INT32_C( 25), INT32_C( 6189), INT32_C( 6350) }, + INT8_C( 3), + { -INT32_C( 51213514), INT32_C( 10795296) } }, + { { -INT32_C( 9807), -INT32_C( 6609) }, + { -INT32_C( 9863), INT32_C( 1251) }, + { INT32_C( 5518), INT32_C( 9557), -INT32_C( 3314), -INT32_C( 1340) }, + INT8_C( 1), + { -INT32_C( 94270498), INT32_C( 11949198) } }, + }; + + simde_int32x2_t r, a, b; + simde_int32x4_t v; + + for (int i = 0 ; i < 8 ; i++) { + a = simde_vld1_s32(test_vec[i].a); + b = simde_vld1_s32(test_vec[i].b); + v = simde_vld1q_s32(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmla_laneq_s32(a, b, v, 0); break; + case 1: r = simde_vmla_laneq_s32(a, b, v, 1); break; + case 2: r = simde_vmla_laneq_s32(a, b, v, 2); break; + case 3: r = simde_vmla_laneq_s32(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdup_n_s32(0); break; + } + simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); + } + + return 0; +} + static int test_simde_vmla_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -273,6 +498,89 @@ test_simde_vmla_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vmla_laneq_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[4]; + uint16_t b[4]; + uint16_t v[8]; + int8_t lane; + uint16_t r[4]; + } test_vec[] = { + { { UINT16_C( 144), UINT16_C( 34), UINT16_C( 24), UINT16_C( 53) }, + { UINT16_C( 114), UINT16_C( 151), UINT16_C( 196), UINT16_C( 168) }, + { UINT16_C( 9), UINT16_C( 121), UINT16_C( 98), UINT16_C( 37), + UINT16_C( 85), UINT16_C( 143), UINT16_C( 114), UINT16_C( 77) }, + INT8_C( 7), + { UINT16_C( 8922), UINT16_C( 11661), UINT16_C( 15116), UINT16_C( 12989) } }, + { { UINT16_C( 46), UINT16_C( 26), UINT16_C( 172), UINT16_C( 157) }, + { UINT16_C( 130), UINT16_C( 10), UINT16_C( 72), UINT16_C( 144) }, + { UINT16_C( 79), UINT16_C( 48), UINT16_C( 31), UINT16_C( 47), + UINT16_C( 141), UINT16_C( 75), UINT16_C( 11), UINT16_C( 0) }, + INT8_C( 4), + { UINT16_C( 18376), UINT16_C( 1436), UINT16_C( 10324), UINT16_C( 20461) } }, + { { UINT16_C( 66), UINT16_C( 115), UINT16_C( 17), UINT16_C( 182) }, + { UINT16_C( 171), UINT16_C( 62), UINT16_C( 79), UINT16_C( 160) }, + { UINT16_C( 149), UINT16_C( 107), UINT16_C( 55), UINT16_C( 109), + UINT16_C( 135), UINT16_C( 105), UINT16_C( 116), UINT16_C( 12) }, + INT8_C( 6), + { UINT16_C( 19902), UINT16_C( 7307), UINT16_C( 9181), UINT16_C( 18742) } }, + { { UINT16_C( 162), UINT16_C( 21), UINT16_C( 179), UINT16_C( 140) }, + { UINT16_C( 116), UINT16_C( 164), UINT16_C( 124), UINT16_C( 121) }, + { UINT16_C( 70), UINT16_C( 9), UINT16_C( 107), UINT16_C( 156), + UINT16_C( 122), UINT16_C( 174), UINT16_C( 56), UINT16_C( 136) }, + INT8_C( 4), + { UINT16_C( 14314), UINT16_C( 20029), UINT16_C( 15307), UINT16_C( 14902) } }, + { { UINT16_C( 156), UINT16_C( 33), UINT16_C( 163), UINT16_C( 127) }, + { UINT16_C( 32), UINT16_C( 12), UINT16_C( 90), UINT16_C( 82) }, + { UINT16_C( 97), UINT16_C( 190), UINT16_C( 0), UINT16_C( 43), + UINT16_C( 37), UINT16_C( 42), UINT16_C( 81), UINT16_C( 151) }, + INT8_C( 6), + { UINT16_C( 2748), UINT16_C( 1005), UINT16_C( 7453), UINT16_C( 6769) } }, + { { UINT16_C( 86), UINT16_C( 68), UINT16_C( 183), UINT16_C( 186) }, + { UINT16_C( 7), UINT16_C( 118), UINT16_C( 191), UINT16_C( 83) }, + { UINT16_C( 139), UINT16_C( 109), UINT16_C( 165), UINT16_C( 138), + UINT16_C( 63), UINT16_C( 145), UINT16_C( 122), UINT16_C( 104) }, + INT8_C( 5), + { UINT16_C( 1101), UINT16_C( 17178), UINT16_C( 27878), UINT16_C( 12221) } }, + { { UINT16_C( 66), UINT16_C( 140), UINT16_C( 133), UINT16_C( 129) }, + { UINT16_C( 164), UINT16_C( 44), UINT16_C( 50), UINT16_C( 9) }, + { UINT16_C( 4), UINT16_C( 13), UINT16_C( 156), UINT16_C( 115), + UINT16_C( 28), UINT16_C( 5), UINT16_C( 57), UINT16_C( 186) }, + INT8_C( 5), + { UINT16_C( 886), UINT16_C( 360), UINT16_C( 383), UINT16_C( 174) } }, + { { UINT16_C( 147), UINT16_C( 6), UINT16_C( 29), UINT16_C( 23) }, + { UINT16_C( 196), UINT16_C( 120), UINT16_C( 91), UINT16_C( 101) }, + { UINT16_C( 183), UINT16_C( 144), UINT16_C( 66), UINT16_C( 70), + UINT16_C( 187), UINT16_C( 197), UINT16_C( 140), UINT16_C( 144) }, + INT8_C( 6), + { UINT16_C( 27587), UINT16_C( 16806), UINT16_C( 12769), UINT16_C( 14163) } }, + }; + + simde_uint16x4_t r, a, b; + simde_uint16x8_t v; + + for (int i = 0 ; i < 8 ; i++) { + a = simde_vld1_u16(test_vec[i].a); + b = simde_vld1_u16(test_vec[i].b); + v = simde_vld1q_u16(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmla_laneq_u16(a, b, v, 0); break; + case 1: r = simde_vmla_laneq_u16(a, b, v, 1); break; + case 2: r = simde_vmla_laneq_u16(a, b, v, 2); break; + case 3: r = simde_vmla_laneq_u16(a, b, v, 3); break; + case 4: r = simde_vmla_laneq_u16(a, b, v, 4); break; + case 5: r = simde_vmla_laneq_u16(a, b, v, 5); break; + case 6: r = simde_vmla_laneq_u16(a, b, v, 6); break; + case 7: r = simde_vmla_laneq_u16(a, b, v, 7); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdup_n_u16(0); break; + } + simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); + } + + return 0; +} + static int test_simde_vmla_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -363,6 +671,77 @@ test_simde_vmla_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vmla_laneq_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[2]; + uint32_t b[2]; + uint32_t v[4]; + int8_t lane; + uint32_t r[2]; + } test_vec[] = { + { { UINT32_C( 779), UINT32_C( 9897) }, + { UINT32_C( 19930), UINT32_C( 9507) }, + { UINT32_C( 6030), UINT32_C( 14920), UINT32_C( 631), UINT32_C( 3218) }, + INT8_C( 0), + { UINT32_C(120178679), UINT32_C( 57337107) } }, + { { UINT32_C( 7008), UINT32_C( 13810) }, + { UINT32_C( 4541), UINT32_C( 1104) }, + { UINT32_C( 14960), UINT32_C( 1169), UINT32_C( 14367), UINT32_C( 15281) }, + INT8_C( 1), + { UINT32_C( 5315437), UINT32_C( 1304386) } }, + { { UINT32_C( 19257), UINT32_C( 3056) }, + { UINT32_C( 3291), UINT32_C( 3783) }, + { UINT32_C( 7597), UINT32_C( 18771), UINT32_C( 5171), UINT32_C( 7806) }, + INT8_C( 0), + { UINT32_C( 25020984), UINT32_C( 28742507) } }, + { { UINT32_C( 14218), UINT32_C( 15283) }, + { UINT32_C( 17057), UINT32_C( 19454) }, + { UINT32_C( 18483), UINT32_C( 19832), UINT32_C( 11066), UINT32_C( 2085) }, + INT8_C( 3), + { UINT32_C( 35578063), UINT32_C( 40576873) } }, + { { UINT32_C( 19077), UINT32_C( 672) }, + { UINT32_C( 13358), UINT32_C( 7702) }, + { UINT32_C( 305), UINT32_C( 13216), UINT32_C( 15048), UINT32_C( 13974) }, + INT8_C( 0), + { UINT32_C( 4093267), UINT32_C( 2349782) } }, + { { UINT32_C( 12243), UINT32_C( 2054) }, + { UINT32_C( 15731), UINT32_C( 15822) }, + { UINT32_C( 13246), UINT32_C( 4996), UINT32_C( 14773), UINT32_C( 18150) }, + INT8_C( 1), + { UINT32_C( 78604319), UINT32_C( 79048766) } }, + { { UINT32_C( 16357), UINT32_C( 12567) }, + { UINT32_C( 10743), UINT32_C( 126) }, + { UINT32_C( 14907), UINT32_C( 9502), UINT32_C( 2633), UINT32_C( 4175) }, + INT8_C( 2), + { UINT32_C( 28302676), UINT32_C( 344325) } }, + { { UINT32_C( 18517), UINT32_C( 19015) }, + { UINT32_C( 12624), UINT32_C( 4257) }, + { UINT32_C( 17459), UINT32_C( 18980), UINT32_C( 11775), UINT32_C( 17522) }, + INT8_C( 3), + { UINT32_C(221216245), UINT32_C( 74610169) } }, + }; + + simde_uint32x2_t r, a, b; + simde_uint32x4_t v; + + for (int i = 0 ; i < 8 ; i++) { + a = simde_vld1_u32(test_vec[i].a); + b = simde_vld1_u32(test_vec[i].b); + v = simde_vld1q_u32(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmla_laneq_u32(a, b, v, 0); break; + case 1: r = simde_vmla_laneq_u32(a, b, v, 1); break; + case 2: r = simde_vmla_laneq_u32(a, b, v, 2); break; + case 3: r = simde_vmla_laneq_u32(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdup_n_u32(0); break; + } + simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); + } + + return 0; +} + static int test_simde_vmla_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -900,6 +1279,430 @@ test_simde_vmlaq_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { #endif } +/* Eric: Skip this function since it will trigger a compiler error when using i686-linux-gnu-g++-11. +static int +test_simde_vmlaq_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + float a[4]; + float b[4]; + float v[4]; + int8_t lane; + float r[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 8442.05), SIMDE_FLOAT32_C( -1389.69), SIMDE_FLOAT32_C( -7758.59), SIMDE_FLOAT32_C( 9540.76) }, + { SIMDE_FLOAT32_C( 6942.28), SIMDE_FLOAT32_C( 8460.25), SIMDE_FLOAT32_C( -6145.94), SIMDE_FLOAT32_C( -5237.55) }, + { SIMDE_FLOAT32_C( 6.77), SIMDE_FLOAT32_C( -67.83), SIMDE_FLOAT32_C( 8178.24), SIMDE_FLOAT32_C( 8618.63) }, + INT8_C( 0), + { SIMDE_FLOAT32_C( 55441.29), SIMDE_FLOAT32_C( 55886.20), SIMDE_FLOAT32_C(-49366.60), SIMDE_FLOAT32_C(-25917.45) } }, + { { SIMDE_FLOAT32_C( 5329.09), SIMDE_FLOAT32_C( -8842.70), SIMDE_FLOAT32_C( 1083.15), SIMDE_FLOAT32_C( 2779.22) }, + { SIMDE_FLOAT32_C( -5037.14), SIMDE_FLOAT32_C( -4543.60), SIMDE_FLOAT32_C( -1131.52), SIMDE_FLOAT32_C( 4145.12) }, + { SIMDE_FLOAT32_C( 4928.14), SIMDE_FLOAT32_C( -3195.41), SIMDE_FLOAT32_C( 9422.83), SIMDE_FLOAT32_C( 529.15) }, + INT8_C( 1), + { SIMDE_FLOAT32_C(16101056.62), SIMDE_FLOAT32_C(14509822.18), SIMDE_FLOAT32_C(3616753.47), SIMDE_FLOAT32_C(-13242578.68) } }, + { { SIMDE_FLOAT32_C( 3210.16), SIMDE_FLOAT32_C( -8073.64), SIMDE_FLOAT32_C( -1871.73), SIMDE_FLOAT32_C( 7766.76) }, + { SIMDE_FLOAT32_C( 8132.39), SIMDE_FLOAT32_C( -6406.75), SIMDE_FLOAT32_C( -1445.99), SIMDE_FLOAT32_C( 9952.74) }, + { SIMDE_FLOAT32_C( -5644.48), SIMDE_FLOAT32_C( 639.33), SIMDE_FLOAT32_C( -1095.59), SIMDE_FLOAT32_C( -658.50) }, + INT8_C( 0), + { SIMDE_FLOAT32_C(-45899902.55), SIMDE_FLOAT32_C(36154698.60), SIMDE_FLOAT32_C(8159989.91), SIMDE_FLOAT32_C(-56170275.12) } }, + { { SIMDE_FLOAT32_C( 9159.77), SIMDE_FLOAT32_C( -4542.00), SIMDE_FLOAT32_C( 9445.80), SIMDE_FLOAT32_C( -2110.99) }, + { SIMDE_FLOAT32_C( -9801.74), SIMDE_FLOAT32_C( 1886.98), SIMDE_FLOAT32_C( 75.78), SIMDE_FLOAT32_C( -3125.09) }, + { SIMDE_FLOAT32_C( 637.73), SIMDE_FLOAT32_C( -6924.14), SIMDE_FLOAT32_C( 4915.85), SIMDE_FLOAT32_C( -4312.70) }, + INT8_C( 2), + { SIMDE_FLOAT32_C(-48174723.81), SIMDE_FLOAT32_C(9271568.63), SIMDE_FLOAT32_C(381968.91), SIMDE_FLOAT32_C(-15364584.67) } }, + { { SIMDE_FLOAT32_C( 8169.70), SIMDE_FLOAT32_C( -7479.28), SIMDE_FLOAT32_C( 3348.22), SIMDE_FLOAT32_C( 9587.11) }, + { SIMDE_FLOAT32_C( 8706.77), SIMDE_FLOAT32_C( 3214.14), SIMDE_FLOAT32_C( 4481.19), SIMDE_FLOAT32_C( 2317.74) }, + { SIMDE_FLOAT32_C( 6486.86), SIMDE_FLOAT32_C( -4651.54), SIMDE_FLOAT32_C( -6489.10), SIMDE_FLOAT32_C( -2517.83) }, + INT8_C( 3), + { SIMDE_FLOAT32_C(-21913997.01), SIMDE_FLOAT32_C(-8100137.40), SIMDE_FLOAT32_C(-11279526.40), SIMDE_FLOAT32_C(-5826088.19) } }, + { { SIMDE_FLOAT32_C( 3469.51), SIMDE_FLOAT32_C( 2478.92), SIMDE_FLOAT32_C( 3545.43), SIMDE_FLOAT32_C( 7751.36) }, + { SIMDE_FLOAT32_C( -6649.73), SIMDE_FLOAT32_C( -7867.38), SIMDE_FLOAT32_C( 615.31), SIMDE_FLOAT32_C( 7194.09) }, + { SIMDE_FLOAT32_C( -94.57), SIMDE_FLOAT32_C( 6945.08), SIMDE_FLOAT32_C( -3326.00), SIMDE_FLOAT32_C( 9571.16) }, + INT8_C( 1), + { SIMDE_FLOAT32_C(-46179437.32), SIMDE_FLOAT32_C(-54637104.57), SIMDE_FLOAT32_C(4276922.60), SIMDE_FLOAT32_C(49971281.94) } }, + { { SIMDE_FLOAT32_C( -114.66), SIMDE_FLOAT32_C( -857.24), SIMDE_FLOAT32_C( 5238.98), SIMDE_FLOAT32_C( 8989.32) }, + { SIMDE_FLOAT32_C( 414.11), SIMDE_FLOAT32_C( -4411.42), SIMDE_FLOAT32_C( -3370.65), SIMDE_FLOAT32_C( -285.04) }, + { SIMDE_FLOAT32_C( 9685.87), SIMDE_FLOAT32_C( -5631.83), SIMDE_FLOAT32_C( 6655.15), SIMDE_FLOAT32_C( -1696.69) }, + INT8_C( 2), + { SIMDE_FLOAT32_C(2755849.51), SIMDE_FLOAT32_C(-29359519.05), SIMDE_FLOAT32_C(-22426942.37), SIMDE_FLOAT32_C(-1887994.64) } }, + { { SIMDE_FLOAT32_C( -7938.21), SIMDE_FLOAT32_C( 4723.86), SIMDE_FLOAT32_C( 4094.72), SIMDE_FLOAT32_C( 8367.37) }, + { SIMDE_FLOAT32_C( -2636.63), SIMDE_FLOAT32_C( -3127.64), SIMDE_FLOAT32_C( 8864.91), SIMDE_FLOAT32_C( 2740.01) }, + { SIMDE_FLOAT32_C( -8938.37), SIMDE_FLOAT32_C( -1798.64), SIMDE_FLOAT32_C( -8001.07), SIMDE_FLOAT32_C( 9705.11) }, + INT8_C( 2), + { SIMDE_FLOAT32_C(21087922.98), SIMDE_FLOAT32_C(25029190.43), SIMDE_FLOAT32_C(-70924670.73), SIMDE_FLOAT32_C(-21914644.44) } }, + }; + + simde_float32x4_t r, a, b, v; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + a = simde_vld1q_f32(test_vec[i].a); + b = simde_vld1q_f32(test_vec[i].b); + v = simde_vld1q_f32(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmlaq_laneq_f32(a, b, v, 0); break; + case 1: r = simde_vmlaq_laneq_f32(a, b, v, 1); break; + case 2: r = simde_vmlaq_laneq_f32(a, b, v, 2); break; + case 3: r = simde_vmlaq_laneq_f32(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_f32(0); break; + } + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + } + + return 0; +} +*/ + +static int +test_simde_vmlaq_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[8]; + int16_t b[8]; + int16_t v[8]; + int8_t lane; + int16_t r[8]; + } test_vec[] = { + { { INT16_C( 27), -INT16_C( 14), INT16_C( 80), -INT16_C( 68), + -INT16_C( 60), INT16_C( 13), -INT16_C( 19), INT16_C( 58) }, + { -INT16_C( 62), INT16_C( 96), INT16_C( 88), -INT16_C( 24), + -INT16_C( 69), INT16_C( 66), -INT16_C( 11), -INT16_C( 43) }, + { -INT16_C( 5), -INT16_C( 2), -INT16_C( 9), -INT16_C( 77), + -INT16_C( 68), INT16_C( 65), -INT16_C( 81), INT16_C( 39) }, + INT8_C( 4), + { INT16_C( 4243), -INT16_C( 6542), -INT16_C( 5904), INT16_C( 1564), + INT16_C( 4632), -INT16_C( 4475), INT16_C( 729), INT16_C( 2982) } }, + { { INT16_C( 86), -INT16_C( 77), -INT16_C( 26), -INT16_C( 24), + INT16_C( 84), -INT16_C( 17), -INT16_C( 35), INT16_C( 34) }, + { -INT16_C( 72), -INT16_C( 12), -INT16_C( 48), INT16_C( 50), + INT16_C( 17), INT16_C( 41), -INT16_C( 78), -INT16_C( 73) }, + { INT16_C( 42), INT16_C( 24), INT16_C( 40), -INT16_C( 89), + -INT16_C( 28), INT16_C( 60), -INT16_C( 2), INT16_C( 52) }, + INT8_C( 2), + { -INT16_C( 2794), -INT16_C( 557), -INT16_C( 1946), INT16_C( 1976), + INT16_C( 764), INT16_C( 1623), -INT16_C( 3155), -INT16_C( 2886) } }, + { { INT16_C( 70), INT16_C( 1), INT16_C( 23), -INT16_C( 44), + -INT16_C( 5), INT16_C( 65), -INT16_C( 27), INT16_C( 23) }, + { INT16_C( 92), -INT16_C( 88), -INT16_C( 41), -INT16_C( 89), + -INT16_C( 37), INT16_C( 63), -INT16_C( 44), -INT16_C( 77) }, + { -INT16_C( 74), INT16_C( 29), -INT16_C( 58), -INT16_C( 38), + -INT16_C( 71), INT16_C( 98), INT16_C( 26), INT16_C( 3) }, + INT8_C( 5), + { INT16_C( 9086), -INT16_C( 8623), -INT16_C( 3995), -INT16_C( 8766), + -INT16_C( 3631), INT16_C( 6239), -INT16_C( 4339), -INT16_C( 7523) } }, + { { -INT16_C( 1), -INT16_C( 60), -INT16_C( 20), INT16_C( 28), + INT16_C( 36), -INT16_C( 98), INT16_C( 95), INT16_C( 19) }, + { -INT16_C( 43), -INT16_C( 59), INT16_C( 41), -INT16_C( 12), + -INT16_C( 73), -INT16_C( 34), INT16_C( 76), -INT16_C( 31) }, + { INT16_C( 0), -INT16_C( 92), -INT16_C( 26), -INT16_C( 76), + -INT16_C( 39), INT16_C( 62), INT16_C( 29), INT16_C( 22) }, + INT8_C( 2), + { INT16_C( 1117), INT16_C( 1474), -INT16_C( 1086), INT16_C( 340), + INT16_C( 1934), INT16_C( 786), -INT16_C( 1881), INT16_C( 825) } }, + { { INT16_C( 76), INT16_C( 78), -INT16_C( 52), INT16_C( 59), + INT16_C( 75), -INT16_C( 12), INT16_C( 26), -INT16_C( 48) }, + { -INT16_C( 7), INT16_C( 21), INT16_C( 1), -INT16_C( 9), + INT16_C( 71), INT16_C( 57), -INT16_C( 36), -INT16_C( 12) }, + { -INT16_C( 23), INT16_C( 6), -INT16_C( 38), INT16_C( 79), + -INT16_C( 62), -INT16_C( 58), -INT16_C( 3), -INT16_C( 5) }, + INT8_C( 7), + { INT16_C( 111), -INT16_C( 27), -INT16_C( 57), INT16_C( 104), + -INT16_C( 280), -INT16_C( 297), INT16_C( 206), INT16_C( 12) } }, + { { INT16_C( 3), -INT16_C( 47), -INT16_C( 89), INT16_C( 58), + -INT16_C( 22), INT16_C( 26), -INT16_C( 49), INT16_C( 6) }, + { INT16_C( 72), INT16_C( 78), -INT16_C( 54), INT16_C( 0), + INT16_C( 12), -INT16_C( 22), -INT16_C( 87), INT16_C( 4) }, + { -INT16_C( 89), -INT16_C( 84), INT16_C( 65), INT16_C( 20), + -INT16_C( 27), -INT16_C( 19), INT16_C( 47), -INT16_C( 12) }, + INT8_C( 6), + { INT16_C( 3387), INT16_C( 3619), -INT16_C( 2627), INT16_C( 58), + INT16_C( 542), -INT16_C( 1008), -INT16_C( 4138), INT16_C( 194) } }, + { { INT16_C( 97), -INT16_C( 3), -INT16_C( 86), INT16_C( 62), + -INT16_C( 5), -INT16_C( 50), -INT16_C( 91), INT16_C( 81) }, + { INT16_C( 47), INT16_C( 49), INT16_C( 4), INT16_C( 4), + -INT16_C( 40), INT16_C( 55), -INT16_C( 34), INT16_C( 85) }, + { INT16_C( 56), INT16_C( 35), -INT16_C( 72), INT16_C( 64), + INT16_C( 52), INT16_C( 80), INT16_C( 12), INT16_C( 48) }, + INT8_C( 4), + { INT16_C( 2541), INT16_C( 2545), INT16_C( 122), INT16_C( 270), + -INT16_C( 2085), INT16_C( 2810), -INT16_C( 1859), INT16_C( 4501) } }, + { { INT16_C( 10), INT16_C( 42), -INT16_C( 53), -INT16_C( 66), + INT16_C( 95), -INT16_C( 34), -INT16_C( 15), -INT16_C( 84) }, + { -INT16_C( 19), INT16_C( 87), -INT16_C( 77), -INT16_C( 65), + -INT16_C( 80), INT16_C( 4), INT16_C( 74), INT16_C( 63) }, + { -INT16_C( 70), -INT16_C( 67), INT16_C( 63), -INT16_C( 62), + -INT16_C( 20), INT16_C( 44), INT16_C( 15), INT16_C( 79) }, + INT8_C( 7), + { -INT16_C( 1491), INT16_C( 6915), -INT16_C( 6136), -INT16_C( 5201), + -INT16_C( 6225), INT16_C( 282), INT16_C( 5831), INT16_C( 4893) } }, + }; + + simde_int16x8_t r, a, b, v; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + a = simde_vld1q_s16(test_vec[i].a); + b = simde_vld1q_s16(test_vec[i].b); + v = simde_vld1q_s16(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmlaq_laneq_s16(a, b, v, 0); break; + case 1: r = simde_vmlaq_laneq_s16(a, b, v, 1); break; + case 2: r = simde_vmlaq_laneq_s16(a, b, v, 2); break; + case 3: r = simde_vmlaq_laneq_s16(a, b, v, 3); break; + case 4: r = simde_vmlaq_laneq_s16(a, b, v, 4); break; + case 5: r = simde_vmlaq_laneq_s16(a, b, v, 5); break; + case 6: r = simde_vmlaq_laneq_s16(a, b, v, 6); break; + case 7: r = simde_vmlaq_laneq_s16(a, b, v, 7); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_s16(0); break; + } + simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlaq_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int32_t b[4]; + int32_t v[4]; + int8_t lane; + int32_t r[4]; + } test_vec[] = { + { { -INT32_C( 6136), INT32_C( 1298), -INT32_C( 4397), -INT32_C( 2313) }, + { -INT32_C( 2459), INT32_C( 5639), INT32_C( 944), INT32_C( 7600) }, + { -INT32_C( 5378), INT32_C( 8852), -INT32_C( 9600), INT32_C( 9935) }, + INT8_C( 1), + { -INT32_C(21773204), INT32_C(49917726), INT32_C( 8351891), INT32_C(67272887) } }, + { { -INT32_C( 9404), INT32_C( 567), -INT32_C( 6346), -INT32_C( 3906) }, + { INT32_C( 6125), INT32_C( 4990), INT32_C( 5704), INT32_C( 3511) }, + { INT32_C( 7787), INT32_C( 7196), INT32_C( 1911), INT32_C( 9234) }, + INT8_C( 3), + { INT32_C(56548846), INT32_C(46078227), INT32_C(52664390), INT32_C(32416668) } }, + { { INT32_C( 124), INT32_C( 1315), -INT32_C( 398), -INT32_C( 8335) }, + { INT32_C( 4856), -INT32_C( 218), INT32_C( 9898), INT32_C( 5450) }, + { INT32_C( 3364), -INT32_C( 8289), -INT32_C( 5348), -INT32_C( 4660) }, + INT8_C( 2), + { -INT32_C(25969764), INT32_C( 1167179), -INT32_C(52934902), -INT32_C(29154935) } }, + { { -INT32_C( 7741), -INT32_C( 5638), -INT32_C( 1216), -INT32_C( 4399) }, + { -INT32_C( 7004), -INT32_C( 7954), -INT32_C( 5842), -INT32_C( 4473) }, + { INT32_C( 4738), -INT32_C( 4541), INT32_C( 3190), INT32_C( 5887) }, + INT8_C( 3), + { -INT32_C(41240289), -INT32_C(46830836), -INT32_C(34393070), -INT32_C(26336950) } }, + { { -INT32_C( 4228), INT32_C( 9643), -INT32_C( 6720), INT32_C( 2725) }, + { INT32_C( 8556), -INT32_C( 5884), -INT32_C( 3313), INT32_C( 3496) }, + { -INT32_C( 9409), -INT32_C( 9792), INT32_C( 284), -INT32_C( 1614) }, + INT8_C( 0), + { -INT32_C(80507632), INT32_C(55372199), INT32_C(31165297), -INT32_C(32891139) } }, + { { INT32_C( 1341), -INT32_C( 617), -INT32_C( 8745), -INT32_C( 4860) }, + { INT32_C( 4084), -INT32_C( 5463), -INT32_C( 1722), -INT32_C( 9232) }, + { INT32_C( 5013), -INT32_C( 9923), INT32_C( 4345), -INT32_C( 7885) }, + INT8_C( 2), + { INT32_C(17746321), -INT32_C(23737352), -INT32_C( 7490835), -INT32_C(40117900) } }, + { { -INT32_C( 4259), -INT32_C( 3605), -INT32_C( 5933), -INT32_C( 2164) }, + { INT32_C( 4526), INT32_C( 5427), -INT32_C( 8312), INT32_C( 6713) }, + { INT32_C( 108), INT32_C( 2835), INT32_C( 3642), -INT32_C( 9015) }, + INT8_C( 3), + { -INT32_C(40806149), -INT32_C(48928010), INT32_C(74926747), -INT32_C(60519859) } }, + { { INT32_C( 8687), INT32_C( 3316), INT32_C( 4039), INT32_C( 4869) }, + { INT32_C( 1050), INT32_C( 4513), -INT32_C( 1617), -INT32_C( 4470) }, + { INT32_C( 1370), -INT32_C( 1457), -INT32_C( 7408), -INT32_C( 7365) }, + INT8_C( 3), + { -INT32_C( 7724563), -INT32_C(33234929), INT32_C(11913244), INT32_C(32926419) } }, + }; + + simde_int32x4_t r, a, b, v; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + a = simde_vld1q_s32(test_vec[i].a); + b = simde_vld1q_s32(test_vec[i].b); + v = simde_vld1q_s32(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmlaq_laneq_s32(a, b, v, 0); break; + case 1: r = simde_vmlaq_laneq_s32(a, b, v, 1); break; + case 2: r = simde_vmlaq_laneq_s32(a, b, v, 2); break; + case 3: r = simde_vmlaq_laneq_s32(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_s32(0); break; + } + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlaq_laneq_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[8]; + uint16_t b[8]; + uint16_t v[8]; + int8_t lane; + uint16_t r[8]; + } test_vec[] = { + { { UINT16_C( 34), UINT16_C( 55), UINT16_C( 53), UINT16_C( 65), + UINT16_C( 8), UINT16_C( 110), UINT16_C( 158), UINT16_C( 151) }, + { UINT16_C( 173), UINT16_C( 56), UINT16_C( 100), UINT16_C( 187), + UINT16_C( 53), UINT16_C( 174), UINT16_C( 100), UINT16_C( 199) }, + { UINT16_C( 143), UINT16_C( 106), UINT16_C( 85), UINT16_C( 161), + UINT16_C( 154), UINT16_C( 119), UINT16_C( 79), UINT16_C( 80) }, + INT8_C( 1), + { UINT16_C( 18372), UINT16_C( 5991), UINT16_C( 10653), UINT16_C( 19887), + UINT16_C( 5626), UINT16_C( 18554), UINT16_C( 10758), UINT16_C( 21245) } }, + { { UINT16_C( 60), UINT16_C( 195), UINT16_C( 16), UINT16_C( 37), + UINT16_C( 149), UINT16_C( 107), UINT16_C( 153), UINT16_C( 113) }, + { UINT16_C( 164), UINT16_C( 111), UINT16_C( 177), UINT16_C( 35), + UINT16_C( 15), UINT16_C( 27), UINT16_C( 192), UINT16_C( 69) }, + { UINT16_C( 35), UINT16_C( 136), UINT16_C( 42), UINT16_C( 13), + UINT16_C( 121), UINT16_C( 151), UINT16_C( 152), UINT16_C( 185) }, + INT8_C( 2), + { UINT16_C( 6948), UINT16_C( 4857), UINT16_C( 7450), UINT16_C( 1507), + UINT16_C( 779), UINT16_C( 1241), UINT16_C( 8217), UINT16_C( 3011) } }, + { { UINT16_C( 76), UINT16_C( 110), UINT16_C( 183), UINT16_C( 176), + UINT16_C( 10), UINT16_C( 39), UINT16_C( 127), UINT16_C( 64) }, + { UINT16_C( 71), UINT16_C( 190), UINT16_C( 18), UINT16_C( 165), + UINT16_C( 189), UINT16_C( 156), UINT16_C( 101), UINT16_C( 164) }, + { UINT16_C( 19), UINT16_C( 88), UINT16_C( 194), UINT16_C( 90), + UINT16_C( 177), UINT16_C( 34), UINT16_C( 165), UINT16_C( 63) }, + INT8_C( 3), + { UINT16_C( 6466), UINT16_C( 17210), UINT16_C( 1803), UINT16_C( 15026), + UINT16_C( 17020), UINT16_C( 14079), UINT16_C( 9217), UINT16_C( 14824) } }, + { { UINT16_C( 192), UINT16_C( 2), UINT16_C( 111), UINT16_C( 133), + UINT16_C( 8), UINT16_C( 69), UINT16_C( 147), UINT16_C( 41) }, + { UINT16_C( 7), UINT16_C( 183), UINT16_C( 162), UINT16_C( 21), + UINT16_C( 61), UINT16_C( 125), UINT16_C( 33), UINT16_C( 127) }, + { UINT16_C( 29), UINT16_C( 195), UINT16_C( 86), UINT16_C( 0), + UINT16_C( 59), UINT16_C( 89), UINT16_C( 99), UINT16_C( 9) }, + INT8_C( 0), + { UINT16_C( 395), UINT16_C( 5309), UINT16_C( 4809), UINT16_C( 742), + UINT16_C( 1777), UINT16_C( 3694), UINT16_C( 1104), UINT16_C( 3724) } }, + { { UINT16_C( 95), UINT16_C( 15), UINT16_C( 0), UINT16_C( 138), + UINT16_C( 174), UINT16_C( 104), UINT16_C( 186), UINT16_C( 105) }, + { UINT16_C( 66), UINT16_C( 149), UINT16_C( 50), UINT16_C( 79), + UINT16_C( 177), UINT16_C( 169), UINT16_C( 40), UINT16_C( 125) }, + { UINT16_C( 67), UINT16_C( 103), UINT16_C( 80), UINT16_C( 199), + UINT16_C( 172), UINT16_C( 94), UINT16_C( 82), UINT16_C( 142) }, + INT8_C( 1), + { UINT16_C( 6893), UINT16_C( 15362), UINT16_C( 5150), UINT16_C( 8275), + UINT16_C( 18405), UINT16_C( 17511), UINT16_C( 4306), UINT16_C( 12980) } }, + { { UINT16_C( 123), UINT16_C( 187), UINT16_C( 45), UINT16_C( 103), + UINT16_C( 65), UINT16_C( 124), UINT16_C( 98), UINT16_C( 75) }, + { UINT16_C( 169), UINT16_C( 102), UINT16_C( 113), UINT16_C( 164), + UINT16_C( 59), UINT16_C( 171), UINT16_C( 72), UINT16_C( 38) }, + { UINT16_C( 174), UINT16_C( 143), UINT16_C( 134), UINT16_C( 24), + UINT16_C( 81), UINT16_C( 150), UINT16_C( 186), UINT16_C( 119) }, + INT8_C( 0), + { UINT16_C( 29529), UINT16_C( 17935), UINT16_C( 19707), UINT16_C( 28639), + UINT16_C( 10331), UINT16_C( 29878), UINT16_C( 12626), UINT16_C( 6687) } }, + { { UINT16_C( 185), UINT16_C( 79), UINT16_C( 109), UINT16_C( 44), + UINT16_C( 147), UINT16_C( 9), UINT16_C( 91), UINT16_C( 66) }, + { UINT16_C( 28), UINT16_C( 160), UINT16_C( 34), UINT16_C( 60), + UINT16_C( 182), UINT16_C( 5), UINT16_C( 85), UINT16_C( 164) }, + { UINT16_C( 132), UINT16_C( 144), UINT16_C( 59), UINT16_C( 73), + UINT16_C( 4), UINT16_C( 50), UINT16_C( 128), UINT16_C( 152) }, + INT8_C( 0), + { UINT16_C( 3881), UINT16_C( 21199), UINT16_C( 4597), UINT16_C( 7964), + UINT16_C( 24171), UINT16_C( 669), UINT16_C( 11311), UINT16_C( 21714) } }, + { { UINT16_C( 46), UINT16_C( 41), UINT16_C( 184), UINT16_C( 128), + UINT16_C( 76), UINT16_C( 168), UINT16_C( 192), UINT16_C( 166) }, + { UINT16_C( 10), UINT16_C( 186), UINT16_C( 84), UINT16_C( 64), + UINT16_C( 186), UINT16_C( 53), UINT16_C( 70), UINT16_C( 100) }, + { UINT16_C( 89), UINT16_C( 87), UINT16_C( 23), UINT16_C( 67), + UINT16_C( 27), UINT16_C( 149), UINT16_C( 191), UINT16_C( 113) }, + INT8_C( 0), + { UINT16_C( 936), UINT16_C( 16595), UINT16_C( 7660), UINT16_C( 5824), + UINT16_C( 16630), UINT16_C( 4885), UINT16_C( 6422), UINT16_C( 9066) } }, + }; + + simde_uint16x8_t r, a, b, v; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + a = simde_vld1q_u16(test_vec[i].a); + b = simde_vld1q_u16(test_vec[i].b); + v = simde_vld1q_u16(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmlaq_laneq_u16(a, b, v, 0); break; + case 1: r = simde_vmlaq_laneq_u16(a, b, v, 1); break; + case 2: r = simde_vmlaq_laneq_u16(a, b, v, 2); break; + case 3: r = simde_vmlaq_laneq_u16(a, b, v, 3); break; + case 4: r = simde_vmlaq_laneq_u16(a, b, v, 4); break; + case 5: r = simde_vmlaq_laneq_u16(a, b, v, 5); break; + case 6: r = simde_vmlaq_laneq_u16(a, b, v, 6); break; + case 7: r = simde_vmlaq_laneq_u16(a, b, v, 7); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_u16(0); break; + } + simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlaq_laneq_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[4]; + uint32_t b[4]; + uint32_t v[4]; + int8_t lane; + uint32_t r[4]; + } test_vec[] = { + { { UINT32_C( 17966), UINT32_C( 9668), UINT32_C( 3899), UINT32_C( 1740) }, + { UINT32_C( 9644), UINT32_C( 8215), UINT32_C( 11766), UINT32_C( 5182) }, + { UINT32_C( 8131), UINT32_C( 6197), UINT32_C( 18142), UINT32_C( 5160) }, + INT8_C( 1), + { UINT32_C( 59781834), UINT32_C( 50918023), UINT32_C( 72917801), UINT32_C( 32114594) } }, + { { UINT32_C( 14022), UINT32_C( 5194), UINT32_C( 11502), UINT32_C( 5635) }, + { UINT32_C( 877), UINT32_C( 10991), UINT32_C( 5463), UINT32_C( 659) }, + { UINT32_C( 5178), UINT32_C( 1188), UINT32_C( 9464), UINT32_C( 1436) }, + INT8_C( 1), + { UINT32_C( 1055898), UINT32_C( 13062502), UINT32_C( 6501546), UINT32_C( 788527) } }, + { { UINT32_C( 18302), UINT32_C( 8019), UINT32_C( 7939), UINT32_C( 15138) }, + { UINT32_C( 13816), UINT32_C( 3641), UINT32_C( 16263), UINT32_C( 7916) }, + { UINT32_C( 3757), UINT32_C( 13705), UINT32_C( 13542), UINT32_C( 4003) }, + INT8_C( 0), + { UINT32_C( 51925014), UINT32_C( 13687256), UINT32_C( 61108030), UINT32_C( 29755550) } }, + { { UINT32_C( 19118), UINT32_C( 5278), UINT32_C( 12843), UINT32_C( 9921) }, + { UINT32_C( 14624), UINT32_C( 9994), UINT32_C( 2130), UINT32_C( 2980) }, + { UINT32_C( 11531), UINT32_C( 2465), UINT32_C( 7359), UINT32_C( 14489) }, + INT8_C( 2), + { UINT32_C( 107637134), UINT32_C( 73551124), UINT32_C( 15687513), UINT32_C( 21939741) } }, + { { UINT32_C( 6232), UINT32_C( 4537), UINT32_C( 1814), UINT32_C( 3971) }, + { UINT32_C( 12752), UINT32_C( 14106), UINT32_C( 17693), UINT32_C( 1938) }, + { UINT32_C( 44), UINT32_C( 178), UINT32_C( 1791), UINT32_C( 12350) }, + INT8_C( 3), + { UINT32_C( 157493432), UINT32_C( 174213637), UINT32_C( 218510364), UINT32_C( 23938271) } }, + { { UINT32_C( 8462), UINT32_C( 4455), UINT32_C( 11958), UINT32_C( 11866) }, + { UINT32_C( 5206), UINT32_C( 11522), UINT32_C( 9962), UINT32_C( 11203) }, + { UINT32_C( 9268), UINT32_C( 17392), UINT32_C( 13804), UINT32_C( 10799) }, + INT8_C( 2), + { UINT32_C( 71872086), UINT32_C( 159054143), UINT32_C( 137527406), UINT32_C( 154658078) } }, + { { UINT32_C( 15528), UINT32_C( 7021), UINT32_C( 16870), UINT32_C( 13050) }, + { UINT32_C( 10135), UINT32_C( 12640), UINT32_C( 13287), UINT32_C( 19824) }, + { UINT32_C( 19598), UINT32_C( 1381), UINT32_C( 6284), UINT32_C( 6134) }, + INT8_C( 3), + { UINT32_C( 62183618), UINT32_C( 77540781), UINT32_C( 81519328), UINT32_C( 121613466) } }, + { { UINT32_C( 10824), UINT32_C( 10467), UINT32_C( 17351), UINT32_C( 9295) }, + { UINT32_C( 19858), UINT32_C( 16339), UINT32_C( 10310), UINT32_C( 9300) }, + { UINT32_C( 13406), UINT32_C( 3924), UINT32_C( 16158), UINT32_C( 10460) }, + INT8_C( 2), + { UINT32_C( 320876388), UINT32_C( 264016029), UINT32_C( 166606331), UINT32_C( 150278695) } }, + }; + + simde_uint32x4_t r, a, b, v; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + a = simde_vld1q_u32(test_vec[i].a); + b = simde_vld1q_u32(test_vec[i].b); + v = simde_vld1q_u32(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmlaq_laneq_u32(a, b, v, 0); break; + case 1: r = simde_vmlaq_laneq_u32(a, b, v, 1); break; + case 2: r = simde_vmlaq_laneq_u32(a, b, v, 2); break; + case 3: r = simde_vmlaq_laneq_u32(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_u32(0); break; + } + simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + } + + return 0; +} + SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmla_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmla_lane_s16) @@ -907,11 +1710,23 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vmla_lane_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmla_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmla_lane_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vmla_laneq_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vmla_laneq_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmla_laneq_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vmla_laneq_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmla_laneq_u32) + SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_lane_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_lane_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_lane_u32) + +//SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_laneq_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_laneq_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_laneq_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_laneq_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_laneq_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" diff --git a/test/arm/neon/mlal_high_lane.c b/test/arm/neon/mlal_high_lane.c new file mode 100644 index 000000000..3a8dcfb60 --- /dev/null +++ b/test/arm/neon/mlal_high_lane.c @@ -0,0 +1,579 @@ +#define SIMDE_TEST_ARM_NEON_INSN mlal_high_lane + +#include "test-neon.h" +#include "../../../simde/arm/neon/mlal_high_lane.h" + +static int +test_simde_vmlal_high_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[8]; + int16_t v[8]; + int8_t lane; + int32_t r[4]; + } test_vec[] = { + { { -INT32_C( 4406058), INT32_C( 8594762), -INT32_C( 4438344), INT32_C( 4284051) }, + { -INT16_C( 7569), -INT16_C( 2636), INT16_C( 9703), -INT16_C( 6205), + -INT16_C( 1133), -INT16_C( 6695), INT16_C( 49), -INT16_C( 3446) }, + { INT16_C( 8747), INT16_C( 1548), -INT16_C( 2880), -INT16_C( 4509), + -INT16_C( 1216), INT16_C( 5189), -INT16_C( 7113), INT16_C( 8336) }, + INT8_C( 5), + { -INT32_C( 10285195), -INT32_C( 26145593), -INT32_C( 4184083), -INT32_C( 13597243) } }, + { { -INT32_C( 9830202), INT32_C( 3121497), -INT32_C( 7759855), -INT32_C( 820350) }, + { -INT16_C( 6887), INT16_C( 9051), INT16_C( 1315), INT16_C( 6723), + -INT16_C( 6245), INT16_C( 2962), -INT16_C( 2156), -INT16_C( 9817) }, + { INT16_C( 9129), INT16_C( 5065), -INT16_C( 905), -INT16_C( 1121), + -INT16_C( 2), INT16_C( 4827), -INT16_C( 8602), -INT16_C( 5206) }, + INT8_C( 0), + { -INT32_C( 66840807), INT32_C( 30161595), -INT32_C( 27441979), -INT32_C( 90439743) } }, + { { INT32_C( 8595222), -INT32_C( 8524727), INT32_C( 4187766), -INT32_C( 9062933) }, + { -INT16_C( 2343), INT16_C( 2612), -INT16_C( 6106), -INT16_C( 4572), + INT16_C( 5179), -INT16_C( 9713), INT16_C( 9329), INT16_C( 4591) }, + { INT16_C( 6566), -INT16_C( 1645), -INT16_C( 3500), INT16_C( 5115), + INT16_C( 3191), -INT16_C( 2940), INT16_C( 7754), INT16_C( 199) }, + INT8_C( 4), + { INT32_C( 25121411), -INT32_C( 39518910), INT32_C( 33956605), INT32_C( 5586948) } }, + { { INT32_C( 8303620), -INT32_C( 2244672), -INT32_C( 7623952), INT32_C( 4498971) }, + { -INT16_C( 1679), INT16_C( 6998), -INT16_C( 7210), INT16_C( 759), + INT16_C( 732), INT16_C( 406), INT16_C( 6834), -INT16_C( 9679) }, + { -INT16_C( 172), -INT16_C( 9945), INT16_C( 9734), -INT16_C( 5552), + INT16_C( 8167), -INT16_C( 1116), -INT16_C( 9401), -INT16_C( 2755) }, + INT8_C( 5), + { INT32_C( 7486708), -INT32_C( 2697768), -INT32_C( 15250696), INT32_C( 15300735) } }, + { { INT32_C( 3026258), -INT32_C( 7909011), -INT32_C( 2204046), INT32_C( 540470) }, + { INT16_C( 3461), -INT16_C( 8968), INT16_C( 3685), -INT16_C( 1789), + INT16_C( 8647), -INT16_C( 3918), INT16_C( 521), INT16_C( 9282) }, + { INT16_C( 2143), -INT16_C( 4425), -INT16_C( 3341), INT16_C( 3974), + INT16_C( 5110), -INT16_C( 8757), INT16_C( 7097), -INT16_C( 7726) }, + INT8_C( 7), + { -INT32_C( 63780464), INT32_C( 22361457), -INT32_C( 6229292), -INT32_C( 71172262) } }, + { { -INT32_C( 8224939), -INT32_C( 8215918), -INT32_C( 5663906), -INT32_C( 5459868) }, + { -INT16_C( 8566), -INT16_C( 3359), INT16_C( 7584), -INT16_C( 8202), + INT16_C( 3417), INT16_C( 8358), INT16_C( 4162), -INT16_C( 545) }, + { -INT16_C( 3264), INT16_C( 7765), -INT16_C( 7330), -INT16_C( 9670), + -INT16_C( 4342), INT16_C( 3531), -INT16_C( 2217), -INT16_C( 9483) }, + INT8_C( 3), + { -INT32_C( 41267329), -INT32_C( 89037778), -INT32_C( 45910446), -INT32_C( 189718) } }, + { { -INT32_C( 6900876), INT32_C( 1989038), INT32_C( 5250510), INT32_C( 2462603) }, + { INT16_C( 8455), INT16_C( 619), -INT16_C( 8256), -INT16_C( 9681), + -INT16_C( 2879), -INT16_C( 1635), -INT16_C( 7379), -INT16_C( 3183) }, + { INT16_C( 6548), -INT16_C( 5649), -INT16_C( 1756), -INT16_C( 1987), + -INT16_C( 9816), INT16_C( 143), INT16_C( 5426), -INT16_C( 8359) }, + INT8_C( 6), + { -INT32_C( 22522330), -INT32_C( 6882472), -INT32_C( 34787944), -INT32_C( 14808355) } }, + { { -INT32_C( 2631663), -INT32_C( 5946993), INT32_C( 9903067), INT32_C( 1768734) }, + { -INT16_C( 8581), INT16_C( 4016), INT16_C( 6351), INT16_C( 8737), + -INT16_C( 114), -INT16_C( 3913), INT16_C( 3836), INT16_C( 647) }, + { INT16_C( 5939), INT16_C( 68), -INT16_C( 8203), INT16_C( 4957), + INT16_C( 113), INT16_C( 51), INT16_C( 4941), INT16_C( 6129) }, + INT8_C( 1), + { -INT32_C( 2639415), -INT32_C( 6213077), INT32_C( 10163915), INT32_C( 1812730) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + simde_int16x8_t v = simde_vld1q_s16(test_vec[i].v); + simde_int32x4_t r; + SIMDE_CONSTIFY_8_(simde_vmlal_high_laneq_s16, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlal_high_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[8]; + int16_t v[4]; + int8_t lane; + int32_t r[4]; + } test_vec[] = { + { { -INT32_C( 92491), -INT32_C( 4991423), -INT32_C( 6962261), INT32_C( 754720) }, + { INT16_C( 2235), -INT16_C( 4935), INT16_C( 7385), -INT16_C( 5463), + -INT16_C( 3474), -INT16_C( 4909), -INT16_C( 640), -INT16_C( 313) }, + { INT16_C( 5168), -INT16_C( 6389), INT16_C( 8233), INT16_C( 6391) }, + INT8_C( 3), + { -INT32_C( 22294825), -INT32_C( 36364842), -INT32_C( 11052501), -INT32_C( 1245663) } }, + { { INT32_C( 9674847), -INT32_C( 1278400), -INT32_C( 5987353), INT32_C( 7527095) }, + { -INT16_C( 1296), -INT16_C( 5541), INT16_C( 6340), -INT16_C( 4916), + -INT16_C( 1132), INT16_C( 8128), INT16_C( 8305), INT16_C( 9032) }, + { -INT16_C( 1737), INT16_C( 3875), INT16_C( 7202), -INT16_C( 7464) }, + INT8_C( 2), + { INT32_C( 1522183), INT32_C( 57259456), INT32_C( 53825257), INT32_C( 72575559) } }, + { { -INT32_C( 2804130), -INT32_C( 3910477), INT32_C( 4740399), INT32_C( 2398754) }, + { INT16_C( 3552), INT16_C( 6585), -INT16_C( 1488), -INT16_C( 1378), + -INT16_C( 6480), INT16_C( 3723), -INT16_C( 650), INT16_C( 9068) }, + { -INT16_C( 2168), -INT16_C( 3240), -INT16_C( 3091), INT16_C( 4709) }, + INT8_C( 0), + { INT32_C( 11244510), -INT32_C( 11981941), INT32_C( 6149599), -INT32_C( 17260670) } }, + { { -INT32_C( 9721994), INT32_C( 480786), INT32_C( 3633340), INT32_C( 4812745) }, + { INT16_C( 6156), -INT16_C( 3750), -INT16_C( 6058), -INT16_C( 5432), + INT16_C( 6996), INT16_C( 4382), INT16_C( 5315), -INT16_C( 8048) }, + { -INT16_C( 5881), -INT16_C( 4564), -INT16_C( 7120), INT16_C( 3603) }, + INT8_C( 3), + { INT32_C( 15484594), INT32_C( 16269132), INT32_C( 22783285), -INT32_C( 24184199) } }, + { { -INT32_C( 6952809), INT32_C( 7957212), INT32_C( 9452683), INT32_C( 1765422) }, + { INT16_C( 2811), INT16_C( 2833), -INT16_C( 3912), INT16_C( 722), + -INT16_C( 1822), INT16_C( 898), INT16_C( 2517), INT16_C( 604) }, + { -INT16_C( 2211), INT16_C( 7933), -INT16_C( 4330), INT16_C( 1609) }, + INT8_C( 1), + { -INT32_C( 21406735), INT32_C( 15081046), INT32_C( 29420044), INT32_C( 6556954) } }, + { { INT32_C( 7386160), INT32_C( 8890562), -INT32_C( 5677039), -INT32_C( 1223802) }, + { INT16_C( 8986), INT16_C( 4235), -INT16_C( 8777), -INT16_C( 4965), + INT16_C( 9251), -INT16_C( 6676), INT16_C( 2104), INT16_C( 4790) }, + { -INT16_C( 9040), INT16_C( 9813), INT16_C( 1608), INT16_C( 4624) }, + INT8_C( 0), + { -INT32_C( 76242880), INT32_C( 69241602), -INT32_C( 24697199), -INT32_C( 44525402) } }, + { { -INT32_C( 1883198), INT32_C( 8275243), INT32_C( 6306150), -INT32_C( 9752993) }, + { INT16_C( 5221), INT16_C( 4046), INT16_C( 1025), -INT16_C( 2123), + -INT16_C( 9462), -INT16_C( 1979), INT16_C( 657), INT16_C( 3911) }, + { INT16_C( 9269), INT16_C( 7938), INT16_C( 210), -INT16_C( 2149) }, + INT8_C( 2), + { -INT32_C( 3870218), INT32_C( 7859653), INT32_C( 6444120), -INT32_C( 8931683) } }, + { { INT32_C( 579073), INT32_C( 1895524), -INT32_C( 7083507), -INT32_C( 6692075) }, + { INT16_C( 7032), -INT16_C( 3476), INT16_C( 9049), INT16_C( 6246), + -INT16_C( 5463), -INT16_C( 2974), -INT16_C( 5445), -INT16_C( 1393) }, + { -INT16_C( 1115), -INT16_C( 9184), INT16_C( 120), INT16_C( 8630) }, + INT8_C( 0), + { INT32_C( 6670318), INT32_C( 5211534), -INT32_C( 1012332), -INT32_C( 5138880) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + simde_int16x4_t v = simde_vld1_s16(test_vec[i].v); + simde_int32x4_t r; + SIMDE_CONSTIFY_4_(simde_vmlal_high_lane_s16, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlal_high_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[4]; + int32_t v[4]; + int8_t lane; + int64_t r[2]; + } test_vec[] = { + { { -INT64_C( 715265200), INT64_C( 269714938) }, + { INT32_C( 88248), -INT32_C( 502078), -INT32_C( 726867), -INT32_C( 591805) }, + { INT32_C( 779565), -INT32_C( 878186), INT32_C( 520407), INT32_C( 469393) }, + INT8_C( 3), + { -INT64_C( 341901546931), -INT64_C( 277519409427) } }, + { { -INT64_C( 142528748), -INT64_C( 845468102) }, + { -INT32_C( 181994), -INT32_C( 311631), INT32_C( 834928), -INT32_C( 325945) }, + { INT32_C( 18645), INT32_C( 5351), INT32_C( 653713), INT32_C( 553944) }, + INT8_C( 3), + { INT64_C( 462360827284), -INT64_C( 181400745182) } }, + { { -INT64_C( 72197620), INT64_C( 416280002) }, + { INT32_C( 395131), INT32_C( 664381), INT32_C( 976748), INT32_C( 960679) }, + { -INT32_C( 80858), -INT32_C( 9524), -INT32_C( 360067), INT32_C( 828252) }, + INT8_C( 2), + { -INT64_C( 351766919736), -INT64_C( 345492525491) } }, + { { INT64_C( 245795381), -INT64_C( 416427705) }, + { -INT32_C( 675063), -INT32_C( 982458), INT32_C( 887762), INT32_C( 931604) }, + { INT32_C( 426164), -INT32_C( 870540), INT32_C( 980771), -INT32_C( 314064) }, + INT8_C( 3), + { -INT64_C( 278568289387), -INT64_C( 292999706361) } }, + { { INT64_C( 914950112), -INT64_C( 152036963) }, + { INT32_C( 408930), -INT32_C( 713592), INT32_C( 345846), INT32_C( 662237) }, + { -INT32_C( 639618), INT32_C( 819242), INT32_C( 926073), INT32_C( 790077) }, + INT8_C( 1), + { INT64_C( 284246518844), INT64_C( 542380327391) } }, + { { -INT64_C( 742448719), -INT64_C( 447505816) }, + { -INT32_C( 101306), INT32_C( 346447), -INT32_C( 441632), -INT32_C( 308497) }, + { -INT32_C( 153682), -INT32_C( 464613), INT32_C( 511092), -INT32_C( 42392) }, + INT8_C( 2), + { -INT64_C( 226457030863), -INT64_C( 158117854540) } }, + { { -INT64_C( 107186426), -INT64_C( 456887033) }, + { INT32_C( 57885), -INT32_C( 789527), INT32_C( 518707), -INT32_C( 658479) }, + { INT32_C( 56961), -INT32_C( 434997), INT32_C( 466543), INT32_C( 436941) }, + INT8_C( 3), + { INT64_C( 226537168861), -INT64_C( 288173359772) } }, + { { INT64_C( 545535846), -INT64_C( 812072135) }, + { -INT32_C( 715200), INT32_C( 98996), -INT32_C( 913076), -INT32_C( 363614) }, + { -INT32_C( 869174), INT32_C( 915053), INT32_C( 12643), -INT32_C( 261674) }, + INT8_C( 3), + { INT64_C( 239473785070), INT64_C( 94336257701) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + simde_int32x4_t v = simde_vld1q_s32(test_vec[i].v); + simde_int64x2_t r; + SIMDE_CONSTIFY_4_(simde_vmlal_high_laneq_s32, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlal_high_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[4]; + int32_t v[2]; + int8_t lane; + int64_t r[2]; + } test_vec[] = { + { { INT64_C( 487680880), -INT64_C( 964134280) }, + { INT32_C( 185396), -INT32_C( 962680), INT32_C( 319975), -INT32_C( 63278) }, + { -INT32_C( 894626), -INT32_C( 380723) }, + INT8_C( 1), + { -INT64_C( 121334161045), INT64_C( 23127255714) } }, + { { INT64_C( 144727650), INT64_C( 171539) }, + { INT32_C( 151644), INT32_C( 214255), INT32_C( 651624), -INT32_C( 42872) }, + { INT32_C( 968940), INT32_C( 111669) }, + INT8_C( 1), + { INT64_C( 72910928106), -INT64_C( 4787301829) } }, + { { INT64_C( 86946010), -INT64_C( 102415414) }, + { INT32_C( 31597), INT32_C( 608905), INT32_C( 101866), -INT32_C( 868125) }, + { -INT32_C( 584715), -INT32_C( 814147) }, + INT8_C( 0), + { -INT64_C( 59475632180), INT64_C( 507503293961) } }, + { { INT64_C( 667044459), INT64_C( 167323826) }, + { -INT32_C( 539162), INT32_C( 570958), -INT32_C( 563508), -INT32_C( 609980) }, + { -INT32_C( 614493), -INT32_C( 579962) }, + INT8_C( 0), + { INT64_C( 346938765903), INT64_C( 374995763966) } }, + { { INT64_C( 212290540), -INT64_C( 680785928) }, + { INT32_C( 559758), INT32_C( 758651), -INT32_C( 218804), INT32_C( 283401) }, + { -INT32_C( 277799), INT32_C( 325444) }, + INT8_C( 1), + { -INT64_C( 70996158436), INT64_C( 91550369116) } }, + { { INT64_C( 48632956), INT64_C( 729181621) }, + { -INT32_C( 184632), INT32_C( 5268), -INT32_C( 663102), -INT32_C( 604470) }, + { INT32_C( 367518), INT32_C( 919660) }, + INT8_C( 0), + { -INT64_C( 243653287880), -INT64_C( 221424423839) } }, + { { INT64_C( 40792544), INT64_C( 525694445) }, + { INT32_C( 330695), -INT32_C( 120051), INT32_C( 418734), INT32_C( 809017) }, + { -INT32_C( 681949), INT32_C( 797501) }, + INT8_C( 0), + { -INT64_C( 285514440022), -INT64_C( 551182639688) } }, + { { -INT64_C( 108331267), -INT64_C( 157859080) }, + { -INT32_C( 729463), -INT32_C( 365438), -INT32_C( 115001), -INT32_C( 131517) }, + { INT32_C( 83846), -INT32_C( 203448) }, + INT8_C( 0), + { -INT64_C( 9750705113), -INT64_C( 11185033462) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + simde_int32x2_t v = simde_vld1_s32(test_vec[i].v); + simde_int64x2_t r; + SIMDE_CONSTIFY_2_(simde_vmlal_high_lane_s32, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlal_high_laneq_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[4]; + uint16_t b[8]; + uint16_t v[8]; + int8_t lane; + uint32_t r[4]; + } test_vec[] = { + { { UINT32_C( 823552), UINT32_C( 1583715), UINT32_C( 125243), UINT32_C( 841409) }, + { UINT16_C( 11516), UINT16_C( 16924), UINT16_C( 12629), UINT16_C( 12120), + UINT16_C( 3697), UINT16_C( 10848), UINT16_C( 13480), UINT16_C( 5328) }, + { UINT16_C( 16203), UINT16_C( 152), UINT16_C( 6471), UINT16_C( 9438), + UINT16_C( 6558), UINT16_C( 3885), UINT16_C( 13828), UINT16_C( 17373) }, + INT8_C( 2), + { UINT32_C( 24746839), UINT32_C( 71781123), UINT32_C( 87354323), UINT32_C( 35318897) } }, + { { UINT32_C( 1149210), UINT32_C( 870807), UINT32_C( 941770), UINT32_C( 538856) }, + { UINT16_C( 5305), UINT16_C( 2956), UINT16_C( 7122), UINT16_C( 11350), + UINT16_C( 9487), UINT16_C( 3690), UINT16_C( 17134), UINT16_C( 220) }, + { UINT16_C( 17477), UINT16_C( 8446), UINT16_C( 18806), UINT16_C( 12133), + UINT16_C( 5081), UINT16_C( 4438), UINT16_C( 16954), UINT16_C( 6981) }, + INT8_C( 5), + { UINT32_C( 43252516), UINT32_C( 17247027), UINT32_C( 76982462), UINT32_C( 1515216) } }, + { { UINT32_C( 1516684), UINT32_C( 1565377), UINT32_C( 1529870), UINT32_C( 1550536) }, + { UINT16_C( 12746), UINT16_C( 6401), UINT16_C( 14666), UINT16_C( 8345), + UINT16_C( 10790), UINT16_C( 12599), UINT16_C( 3610), UINT16_C( 9232) }, + { UINT16_C( 3677), UINT16_C( 1991), UINT16_C( 3971), UINT16_C( 8269), + UINT16_C( 10848), UINT16_C( 14011), UINT16_C( 7535), UINT16_C( 4999) }, + INT8_C( 1), + { UINT32_C( 22999574), UINT32_C( 26649986), UINT32_C( 8717380), UINT32_C( 19931448) } }, + { { UINT32_C( 179695), UINT32_C( 1633448), UINT32_C( 1361260), UINT32_C( 886103) }, + { UINT16_C( 5504), UINT16_C( 15015), UINT16_C( 15563), UINT16_C( 5303), + UINT16_C( 16734), UINT16_C( 4420), UINT16_C( 4520), UINT16_C( 4370) }, + { UINT16_C( 19267), UINT16_C( 2332), UINT16_C( 11249), UINT16_C( 18799), + UINT16_C( 13891), UINT16_C( 5895), UINT16_C( 13534), UINT16_C( 12884) }, + INT8_C( 4), + { UINT32_C(232631689), UINT32_C( 63031668), UINT32_C( 64148580), UINT32_C( 61589773) } }, + { { UINT32_C( 35822), UINT32_C( 672887), UINT32_C( 327136), UINT32_C( 1985525) }, + { UINT16_C( 10533), UINT16_C( 1227), UINT16_C( 18158), UINT16_C( 7741), + UINT16_C( 19212), UINT16_C( 13701), UINT16_C( 19168), UINT16_C( 18807) }, + { UINT16_C( 17532), UINT16_C( 7503), UINT16_C( 7017), UINT16_C( 18649), + UINT16_C( 4424), UINT16_C( 6986), UINT16_C( 11514), UINT16_C( 9620) }, + INT8_C( 4), + { UINT32_C( 85029710), UINT32_C( 61286111), UINT32_C( 85126368), UINT32_C( 85187693) } }, + { { UINT32_C( 557761), UINT32_C( 279493), UINT32_C( 821030), UINT32_C( 1682208) }, + { UINT16_C( 17401), UINT16_C( 9083), UINT16_C( 13665), UINT16_C( 19890), + UINT16_C( 11596), UINT16_C( 16520), UINT16_C( 343), UINT16_C( 880) }, + { UINT16_C( 6411), UINT16_C( 5820), UINT16_C( 4373), UINT16_C( 9293), + UINT16_C( 11919), UINT16_C( 11552), UINT16_C( 19614), UINT16_C( 15450) }, + INT8_C( 7), + { UINT32_C(179715961), UINT32_C(255513493), UINT32_C( 6120380), UINT32_C( 15278208) } }, + { { UINT32_C( 278083), UINT32_C( 790523), UINT32_C( 1050079), UINT32_C( 1535143) }, + { UINT16_C( 15332), UINT16_C( 19391), UINT16_C( 1377), UINT16_C( 14311), + UINT16_C( 19662), UINT16_C( 9467), UINT16_C( 3839), UINT16_C( 14765) }, + { UINT16_C( 16834), UINT16_C( 5587), UINT16_C( 13414), UINT16_C( 4613), + UINT16_C( 5986), UINT16_C( 6482), UINT16_C( 8238), UINT16_C( 9616) }, + INT8_C( 0), + { UINT32_C(331268191), UINT32_C(160158001), UINT32_C( 65675805), UINT32_C(250089153) } }, + { { UINT32_C( 673192), UINT32_C( 1057014), UINT32_C( 587709), UINT32_C( 1164643) }, + { UINT16_C( 16534), UINT16_C( 16353), UINT16_C( 11217), UINT16_C( 14975), + UINT16_C( 19641), UINT16_C( 12295), UINT16_C( 3505), UINT16_C( 5527) }, + { UINT16_C( 14295), UINT16_C( 19182), UINT16_C( 1458), UINT16_C( 1238), + UINT16_C( 10902), UINT16_C( 12367), UINT16_C( 13167), UINT16_C( 12804) }, + INT8_C( 7), + { UINT32_C(252156556), UINT32_C(158482194), UINT32_C( 45465729), UINT32_C( 71932351) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); + simde_uint16x8_t v = simde_vld1q_u16(test_vec[i].v); + simde_uint32x4_t r; + SIMDE_CONSTIFY_8_(simde_vmlal_high_laneq_u16, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlal_high_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[4]; + uint16_t b[8]; + uint16_t v[4]; + int8_t lane; + uint32_t r[4]; + } test_vec[] = { + { { UINT32_C( 420634), UINT32_C( 293005), UINT32_C( 407798), UINT32_C( 1465890) }, + { UINT16_C( 3558), UINT16_C( 9827), UINT16_C( 6927), UINT16_C( 613), + UINT16_C( 19349), UINT16_C( 14878), UINT16_C( 19617), UINT16_C( 9078) }, + { UINT16_C( 8390), UINT16_C( 10747), UINT16_C( 15158), UINT16_C( 4748) }, + INT8_C( 3), + { UINT32_C( 92289686), UINT32_C( 70933749), UINT32_C( 93549314), UINT32_C( 44568234) } }, + { { UINT32_C( 1737289), UINT32_C( 297926), UINT32_C( 308256), UINT32_C( 766049) }, + { UINT16_C( 17935), UINT16_C( 15073), UINT16_C( 15854), UINT16_C( 12815), + UINT16_C( 4185), UINT16_C( 12810), UINT16_C( 12448), UINT16_C( 3050) }, + { UINT16_C( 853), UINT16_C( 8241), UINT16_C( 14175), UINT16_C( 3816) }, + INT8_C( 0), + { UINT32_C( 5307094), UINT32_C( 11224856), UINT32_C( 10926400), UINT32_C( 3367699) } }, + { { UINT32_C( 71249), UINT32_C( 1172186), UINT32_C( 843078), UINT32_C( 1431404) }, + { UINT16_C( 16237), UINT16_C( 2382), UINT16_C( 3646), UINT16_C( 19155), + UINT16_C( 12263), UINT16_C( 19738), UINT16_C( 1221), UINT16_C( 8080) }, + { UINT16_C( 13170), UINT16_C( 8819), UINT16_C( 18220), UINT16_C( 11123) }, + INT8_C( 3), + { UINT32_C(136472598), UINT32_C(220717960), UINT32_C( 14424261), UINT32_C( 91305244) } }, + { { UINT32_C( 73397), UINT32_C( 105926), UINT32_C( 843999), UINT32_C( 3264) }, + { UINT16_C( 12288), UINT16_C( 5295), UINT16_C( 18343), UINT16_C( 3415), + UINT16_C( 11416), UINT16_C( 12486), UINT16_C( 17062), UINT16_C( 9676) }, + { UINT16_C( 13059), UINT16_C( 3087), UINT16_C( 11677), UINT16_C( 4325) }, + INT8_C( 0), + { UINT32_C(149154941), UINT32_C(163160600), UINT32_C(223656657), UINT32_C(126362148) } }, + { { UINT32_C( 734791), UINT32_C( 1657848), UINT32_C( 424852), UINT32_C( 1242994) }, + { UINT16_C( 11986), UINT16_C( 7890), UINT16_C( 14401), UINT16_C( 18381), + UINT16_C( 17965), UINT16_C( 16613), UINT16_C( 2409), UINT16_C( 18508) }, + { UINT16_C( 13721), UINT16_C( 11302), UINT16_C( 3779), UINT16_C( 13766) }, + INT8_C( 2), + { UINT32_C( 68624526), UINT32_C( 64438375), UINT32_C( 9528463), UINT32_C( 71184726) } }, + { { UINT32_C( 1456657), UINT32_C( 713432), UINT32_C( 1448872), UINT32_C( 475639) }, + { UINT16_C( 1136), UINT16_C( 5844), UINT16_C( 2586), UINT16_C( 3257), + UINT16_C( 5857), UINT16_C( 5330), UINT16_C( 5152), UINT16_C( 14123) }, + { UINT16_C( 8401), UINT16_C( 18368), UINT16_C( 1802), UINT16_C( 6512) }, + INT8_C( 2), + { UINT32_C( 12010971), UINT32_C( 10318092), UINT32_C( 10732776), UINT32_C( 25925285) } }, + { { UINT32_C( 1924275), UINT32_C( 868137), UINT32_C( 856472), UINT32_C( 331562) }, + { UINT16_C( 4671), UINT16_C( 343), UINT16_C( 17990), UINT16_C( 5319), + UINT16_C( 17163), UINT16_C( 2321), UINT16_C( 1233), UINT16_C( 11827) }, + { UINT16_C( 15272), UINT16_C( 16570), UINT16_C( 13202), UINT16_C( 13685) }, + INT8_C( 2), + { UINT32_C(228510201), UINT32_C( 31509979), UINT32_C( 17134538), UINT32_C(156471616) } }, + { { UINT32_C( 1268606), UINT32_C( 1863775), UINT32_C( 284716), UINT32_C( 1590387) }, + { UINT16_C( 18463), UINT16_C( 16489), UINT16_C( 7049), UINT16_C( 11593), + UINT16_C( 18659), UINT16_C( 5537), UINT16_C( 16458), UINT16_C( 16115) }, + { UINT16_C( 18417), UINT16_C( 9423), UINT16_C( 8975), UINT16_C( 2463) }, + INT8_C( 3), + { UINT32_C( 47225723), UINT32_C( 15501406), UINT32_C( 40820770), UINT32_C( 41281632) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); + simde_uint16x4_t v = simde_vld1_u16(test_vec[i].v); + simde_uint32x4_t r; + SIMDE_CONSTIFY_4_(simde_vmlal_high_lane_u16, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlal_high_laneq_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[2]; + uint32_t b[4]; + uint32_t v[4]; + int8_t lane; + uint64_t r[2]; + } test_vec[] = { + { { UINT64_C( 581865556), UINT64_C( 1961421217) }, + { UINT32_C( 587948), UINT32_C( 928289), UINT32_C( 1193217), UINT32_C( 66551) }, + { UINT32_C( 1943855), UINT32_C( 1254342), UINT32_C( 1608561), UINT32_C( 818907) }, + INT8_C( 0), + { UINT64_C(2320022697091), UINT64_C( 131326915322) } }, + { { UINT64_C( 23584783), UINT64_C( 646857139) }, + { UINT32_C( 1821357), UINT32_C( 1994247), UINT32_C( 1905983), UINT32_C( 1972490) }, + { UINT32_C( 279410), UINT32_C( 982246), UINT32_C( 408226), UINT32_C( 899202) }, + INT8_C( 0), + { UINT64_C( 532574294813), UINT64_C( 551780288039) } }, + { { UINT64_C( 1695604807), UINT64_C( 1426639515) }, + { UINT32_C( 731), UINT32_C( 1180120), UINT32_C( 247965), UINT32_C( 1134152) }, + { UINT32_C( 1495374), UINT32_C( 1527916), UINT32_C( 1647922), UINT32_C( 1898499) }, + INT8_C( 3), + { UINT64_C( 472456909342), UINT64_C(2154613077363) } }, + { { UINT64_C( 275591214), UINT64_C( 291318976) }, + { UINT32_C( 597721), UINT32_C( 1044352), UINT32_C( 385596), UINT32_C( 373887) }, + { UINT32_C( 1046650), UINT32_C( 1848883), UINT32_C( 1370487), UINT32_C( 1319755) }, + INT8_C( 0), + { UINT64_C( 403859644614), UINT64_C( 391620147526) } }, + { { UINT64_C( 1078618217), UINT64_C( 1418666301) }, + { UINT32_C( 622684), UINT32_C( 1794624), UINT32_C( 40910), UINT32_C( 1531594) }, + { UINT32_C( 1362106), UINT32_C( 1543402), UINT32_C( 576586), UINT32_C( 122961) }, + INT8_C( 2), + { UINT64_C( 24666751477), UINT64_C( 884514324385) } }, + { { UINT64_C( 1059991594), UINT64_C( 1685059887) }, + { UINT32_C( 1654297), UINT32_C( 675528), UINT32_C( 874539), UINT32_C( 1639672) }, + { UINT32_C( 991106), UINT32_C( 823282), UINT32_C( 1390309), UINT32_C( 1057691) }, + INT8_C( 2), + { UINT64_C(1216939434145), UINT64_C(2281335798535) } }, + { { UINT64_C( 2400707), UINT64_C( 1301244679) }, + { UINT32_C( 559021), UINT32_C( 1672652), UINT32_C( 1227845), UINT32_C( 1237646) }, + { UINT32_C( 412574), UINT32_C( 1580173), UINT32_C( 1347008), UINT32_C( 790812) }, + INT8_C( 2), + { UINT64_C(1653919438467), UINT64_C(1668420307847) } }, + { { UINT64_C( 1312259781), UINT64_C( 1662531619) }, + { UINT32_C( 377502), UINT32_C( 1774224), UINT32_C( 1399533), UINT32_C( 1819326) }, + { UINT32_C( 968072), UINT32_C( 1555748), UINT32_C( 1537622), UINT32_C( 611277) }, + INT8_C( 0), + { UINT64_C(1356160970157), UINT64_C(1762901091091) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); + simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); + simde_uint32x4_t v = simde_vld1q_u32(test_vec[i].v); + simde_uint64x2_t r; + SIMDE_CONSTIFY_4_(simde_vmlal_high_laneq_u32, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlal_high_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[2]; + uint32_t b[4]; + uint32_t v[2]; + int8_t lane; + uint64_t r[2]; + } test_vec[] = { + { { UINT64_C( 1428131384), UINT64_C( 1941064234) }, + { UINT32_C( 1491368), UINT32_C( 680507), UINT32_C( 1340293), UINT32_C( 1164242) }, + { UINT32_C( 1520576), UINT32_C( 561467) }, + INT8_C( 1), + { UINT64_C( 753958421215), UINT64_C( 655624527248) } }, + { { UINT64_C( 1434849562), UINT64_C( 902513617) }, + { UINT32_C( 742665), UINT32_C( 1763018), UINT32_C( 1692445), UINT32_C( 194431) }, + { UINT32_C( 1598735), UINT32_C( 1196324) }, + INT8_C( 1), + { UINT64_C(2026147421742), UINT64_C( 233504985261) } }, + { { UINT64_C( 55247748), UINT64_C( 1266733675) }, + { UINT32_C( 290891), UINT32_C( 1869978), UINT32_C( 984496), UINT32_C( 795253) }, + { UINT32_C( 416879), UINT32_C( 1132903) }, + INT8_C( 0), + { UINT64_C( 410470955732), UINT64_C( 332791009062) } }, + { { UINT64_C( 1195519168), UINT64_C( 1592682824) }, + { UINT32_C( 10452), UINT32_C( 478545), UINT32_C( 1870027), UINT32_C( 1825678) }, + { UINT32_C( 1188188), UINT32_C( 1700521) }, + INT8_C( 0), + { UINT64_C(2223139160244), UINT64_C(2170841374288) } }, + { { UINT64_C( 272189059), UINT64_C( 312207824) }, + { UINT32_C( 1162028), UINT32_C( 878095), UINT32_C( 1078069), UINT32_C( 1970971) }, + { UINT32_C( 199568), UINT32_C( 365021) }, + INT8_C( 0), + { UINT64_C( 215420263251), UINT64_C( 393654948352) } }, + { { UINT64_C( 1025265761), UINT64_C( 1590405508) }, + { UINT32_C( 1314714), UINT32_C( 569364), UINT32_C( 579195), UINT32_C( 1097124) }, + { UINT32_C( 1944630), UINT32_C( 579203) }, + INT8_C( 1), + { UINT64_C( 336496747346), UINT64_C( 637047917680) } }, + { { UINT64_C( 125528816), UINT64_C( 1105835591) }, + { UINT32_C( 1547539), UINT32_C( 534056), UINT32_C( 671409), UINT32_C( 1718709) }, + { UINT32_C( 1330530), UINT32_C( 24974) }, + INT8_C( 0), + { UINT64_C( 893455345586), UINT64_C(2287899721361) } }, + { { UINT64_C( 1637747186), UINT64_C( 1003847865) }, + { UINT32_C( 18831), UINT32_C( 781780), UINT32_C( 1837816), UINT32_C( 1298414) }, + { UINT32_C( 1991192), UINT32_C( 33878) }, + INT8_C( 0), + { UINT64_C(3661082263858), UINT64_C(2586395417353) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); + simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); + simde_uint32x2_t v = simde_vld1_u32(test_vec[i].v); + simde_uint64x2_t r; + SIMDE_CONSTIFY_2_(simde_vmlal_high_lane_u32, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); + } + + return 0; +} + + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_high_lane_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_high_lane_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_high_lane_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_high_lane_u32) + +SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_high_laneq_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_high_laneq_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_high_laneq_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_high_laneq_u32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/mls_lane.c b/test/arm/neon/mls_lane.c new file mode 100644 index 000000000..12d861722 --- /dev/null +++ b/test/arm/neon/mls_lane.c @@ -0,0 +1,1576 @@ + +#define SIMDE_TEST_ARM_NEON_INSN mls_lane + +#include "test-neon.h" +#include "../../../simde/arm/neon/mls_lane.h" + +SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ + +static int +test_simde_vmls_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + float a[2]; + float b[2]; + float v[4]; + int8_t lane; + float r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 9460.10), SIMDE_FLOAT32_C( 5133.15) }, + { SIMDE_FLOAT32_C( -46.94), SIMDE_FLOAT32_C( 4.95) }, + { SIMDE_FLOAT32_C( -21.21), SIMDE_FLOAT32_C( 95.76), SIMDE_FLOAT32_C( -75.66), SIMDE_FLOAT32_C( 96.16)}, + INT8_C( 1), + { SIMDE_FLOAT32_C( 13955.07), SIMDE_FLOAT32_C( 4659.14) } }, + { { SIMDE_FLOAT32_C( -8475.37), SIMDE_FLOAT32_C( -3344.46) }, + { SIMDE_FLOAT32_C( -71.00), SIMDE_FLOAT32_C( -74.43) }, + { SIMDE_FLOAT32_C( -44.49), SIMDE_FLOAT32_C( 66.28), SIMDE_FLOAT32_C( 82.23), SIMDE_FLOAT32_C( -29.17)}, + INT8_C( 3), + { SIMDE_FLOAT32_C(-10546.44), SIMDE_FLOAT32_C( -5515.58) } }, + { { SIMDE_FLOAT32_C( -8051.58), SIMDE_FLOAT32_C( -6624.70) }, + { SIMDE_FLOAT32_C( -68.31), SIMDE_FLOAT32_C( -59.05) }, + { SIMDE_FLOAT32_C( -78.34), SIMDE_FLOAT32_C( 64.95), SIMDE_FLOAT32_C( 85.49), SIMDE_FLOAT32_C( 74.16)}, + INT8_C( 2), + { SIMDE_FLOAT32_C( -2211.76), SIMDE_FLOAT32_C( -1576.52) } }, + { { SIMDE_FLOAT32_C( -6072.32), SIMDE_FLOAT32_C( -9665.18) }, + { SIMDE_FLOAT32_C( -88.69), SIMDE_FLOAT32_C( -11.61) }, + { SIMDE_FLOAT32_C( 63.00), SIMDE_FLOAT32_C( 42.22), SIMDE_FLOAT32_C( -34.75), SIMDE_FLOAT32_C( -58.12)}, + INT8_C( 0), + { SIMDE_FLOAT32_C( -484.85), SIMDE_FLOAT32_C( -8933.75) } }, + { { SIMDE_FLOAT32_C( -5844.94), SIMDE_FLOAT32_C( 1706.98) }, + { SIMDE_FLOAT32_C( -47.12), SIMDE_FLOAT32_C( -45.01) }, + { SIMDE_FLOAT32_C( -17.06), SIMDE_FLOAT32_C( -90.29), SIMDE_FLOAT32_C( 67.45), SIMDE_FLOAT32_C( -45.84)}, + INT8_C( 0), + { SIMDE_FLOAT32_C( -6648.81), SIMDE_FLOAT32_C( 939.11) } }, + { { SIMDE_FLOAT32_C( 7278.68), SIMDE_FLOAT32_C( -9305.98) }, + { SIMDE_FLOAT32_C( 19.76), SIMDE_FLOAT32_C( 52.33) }, + { SIMDE_FLOAT32_C( -81.30), SIMDE_FLOAT32_C( -80.15), SIMDE_FLOAT32_C( -15.48), SIMDE_FLOAT32_C( -31.32)}, + INT8_C( 1), + { SIMDE_FLOAT32_C( 8862.44), SIMDE_FLOAT32_C( -5111.73) } }, + { { SIMDE_FLOAT32_C( 4924.02), SIMDE_FLOAT32_C( 6091.74) }, + { SIMDE_FLOAT32_C( -73.42), SIMDE_FLOAT32_C( -6.69) }, + { SIMDE_FLOAT32_C( -55.97), SIMDE_FLOAT32_C( -71.24), SIMDE_FLOAT32_C( 21.51), SIMDE_FLOAT32_C( 23.98)}, + INT8_C( 2), + { SIMDE_FLOAT32_C( 6503.28), SIMDE_FLOAT32_C( 6235.64) } }, + { { SIMDE_FLOAT32_C( 2182.24), SIMDE_FLOAT32_C( 505.13) }, + { SIMDE_FLOAT32_C( 21.83), SIMDE_FLOAT32_C( 45.14) }, + { SIMDE_FLOAT32_C( -73.69), SIMDE_FLOAT32_C( 63.75), SIMDE_FLOAT32_C( -10.02), SIMDE_FLOAT32_C( 25.19)}, + INT8_C( 3), + { SIMDE_FLOAT32_C( 1632.34), SIMDE_FLOAT32_C( -631.95) } }, + }; + + simde_float32x2_t r, a, b; + simde_float32x4_t v; + + for (int i = 0 ; i < 8 ; i++) { + a = simde_vld1_f32(test_vec[i].a); + b = simde_vld1_f32(test_vec[i].b); + v = simde_vld1q_f32(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmls_laneq_f32(a, b, v, 0); break; + case 1: r = simde_vmls_laneq_f32(a, b, v, 1); break; + case 2: r = simde_vmls_laneq_f32(a, b, v, 2); break; + case 3: r = simde_vmls_laneq_f32(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdup_n_f32(0); break; + } + simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); + } + + return 0; +} + +static int +test_simde_vmls_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + float a[2]; + float b[2]; + float v[2]; + int8_t lane; + float r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( -6873.77), SIMDE_FLOAT32_C( 9485.11) }, + { SIMDE_FLOAT32_C( -68.87), SIMDE_FLOAT32_C( 98.20) }, + { SIMDE_FLOAT32_C( -95.02), SIMDE_FLOAT32_C( -15.71)}, + INT8_C( 0), + { SIMDE_FLOAT32_C(-13417.80), SIMDE_FLOAT32_C( 18816.07) } }, + { { SIMDE_FLOAT32_C( -3003.07), SIMDE_FLOAT32_C( -8337.70) }, + { SIMDE_FLOAT32_C( 29.64), SIMDE_FLOAT32_C( -12.68) }, + { SIMDE_FLOAT32_C( -97.40), SIMDE_FLOAT32_C( 90.45)}, + INT8_C( 0), + { SIMDE_FLOAT32_C( -116.13), SIMDE_FLOAT32_C( -9572.73) } }, + { { SIMDE_FLOAT32_C( 3537.35), SIMDE_FLOAT32_C( 9430.40) }, + { SIMDE_FLOAT32_C( -83.99), SIMDE_FLOAT32_C( -6.05) }, + { SIMDE_FLOAT32_C( -3.56), SIMDE_FLOAT32_C( 92.42)}, + INT8_C( 0), + { SIMDE_FLOAT32_C( 3238.35), SIMDE_FLOAT32_C( 9408.86) } }, + { { SIMDE_FLOAT32_C( -3238.57), SIMDE_FLOAT32_C( -2004.28) }, + { SIMDE_FLOAT32_C( -50.70), SIMDE_FLOAT32_C( -54.85) }, + { SIMDE_FLOAT32_C( -98.12), SIMDE_FLOAT32_C( 59.24)}, + INT8_C( 1), + { SIMDE_FLOAT32_C( -235.10), SIMDE_FLOAT32_C( 1245.03) } }, + { { SIMDE_FLOAT32_C( 6692.30), SIMDE_FLOAT32_C( -8729.98) }, + { SIMDE_FLOAT32_C( 83.68), SIMDE_FLOAT32_C( -94.71) }, + { SIMDE_FLOAT32_C( 87.24), SIMDE_FLOAT32_C( 45.14)}, + INT8_C( 0), + { SIMDE_FLOAT32_C( -607.94), SIMDE_FLOAT32_C( -467.48) } }, + { { SIMDE_FLOAT32_C( 2413.77), SIMDE_FLOAT32_C( 85.79) }, + { SIMDE_FLOAT32_C( 6.51), SIMDE_FLOAT32_C( -90.46) }, + { SIMDE_FLOAT32_C( -21.94), SIMDE_FLOAT32_C( 17.07)}, + INT8_C( 1), + { SIMDE_FLOAT32_C( 2302.64), SIMDE_FLOAT32_C( 1629.94) } }, + { { SIMDE_FLOAT32_C( 6994.38), SIMDE_FLOAT32_C( 2861.74) }, + { SIMDE_FLOAT32_C( -90.54), SIMDE_FLOAT32_C( 96.29) }, + { SIMDE_FLOAT32_C( 66.85), SIMDE_FLOAT32_C( 12.04)}, + INT8_C( 1), + { SIMDE_FLOAT32_C( 8084.48), SIMDE_FLOAT32_C( 1702.41) } }, + { { SIMDE_FLOAT32_C( 1324.60), SIMDE_FLOAT32_C( -6880.65) }, + { SIMDE_FLOAT32_C( -88.42), SIMDE_FLOAT32_C( -18.72) }, + { SIMDE_FLOAT32_C( 17.66), SIMDE_FLOAT32_C( 68.18)}, + INT8_C( 1), + { SIMDE_FLOAT32_C( 7353.08), SIMDE_FLOAT32_C( -5604.32) } }, + }; + + simde_float32x2_t r, a, b, v; + + for (int i = 0 ; i < 8 ; i++) { + a = simde_vld1_f32(test_vec[i].a); + b = simde_vld1_f32(test_vec[i].b); + v = simde_vld1_f32(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmls_lane_f32(a, b, v, 0); break; + case 1: r = simde_vmls_lane_f32(a, b, v, 1); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdup_n_f32(0); break; + } + simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); + } + + return 0; +} +static int +test_simde_vmls_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[4]; + int16_t b[4]; + int16_t v[8]; + int8_t lane; + int16_t r[4]; + } test_vec[] = { + { { -INT16_C( 7117), INT16_C( 1424), -INT16_C( 7491), -INT16_C( 9317) }, + { INT16_C( 51), INT16_C( 91), INT16_C( 74), -INT16_C( 35) }, + { -INT16_C( 46), -INT16_C( 56), -INT16_C( 9), -INT16_C( 93), + INT16_C( 96), -INT16_C( 68), -INT16_C( 96), INT16_C( 0)}, + INT8_C( 2), + { -INT16_C( 6658), INT16_C( 2243), -INT16_C( 6825), -INT16_C( 9632) } }, + { { -INT16_C( 8618), INT16_C( 2840), -INT16_C( 6126), INT16_C( 6799) }, + { -INT16_C( 69), -INT16_C( 94), -INT16_C( 40), INT16_C( 44) }, + { INT16_C( 46), -INT16_C( 35), -INT16_C( 50), INT16_C( 50), + -INT16_C( 19), -INT16_C( 90), INT16_C( 49), INT16_C( 17)}, + INT8_C( 7), + { -INT16_C( 7445), INT16_C( 4438), -INT16_C( 5446), INT16_C( 6051) } }, + { { INT16_C( 9015), -INT16_C( 4740), -INT16_C( 4394), INT16_C( 9290) }, + { -INT16_C( 31), INT16_C( 21), INT16_C( 66), INT16_C( 70) }, + { INT16_C( 18), -INT16_C( 52), INT16_C( 77), -INT16_C( 18), + INT16_C( 68), -INT16_C( 92), -INT16_C( 9), INT16_C( 44)}, + INT8_C( 1), + { INT16_C( 7403), -INT16_C( 3648), -INT16_C( 962), INT16_C( 12930) } }, + { { INT16_C( 4745), -INT16_C( 5011), INT16_C( 2800), -INT16_C( 7710) }, + { -INT16_C( 79), -INT16_C( 98), INT16_C( 38), -INT16_C( 75) }, + { -INT16_C( 64), -INT16_C( 52), INT16_C( 30), -INT16_C( 33), + -INT16_C( 34), -INT16_C( 35), -INT16_C( 89), -INT16_C( 78)}, + INT8_C( 4), + { INT16_C( 2059), -INT16_C( 8343), INT16_C( 4092), -INT16_C( 10260) } }, + { { -INT16_C( 3099), INT16_C( 818), -INT16_C( 6722), -INT16_C( 7942) }, + { -INT16_C( 7), INT16_C( 84), -INT16_C( 96), INT16_C( 29) }, + { -INT16_C( 19), INT16_C( 18), INT16_C( 8), INT16_C( 36), + INT16_C( 98), -INT16_C( 4), -INT16_C( 85), -INT16_C( 21)}, + INT8_C( 2), + { -INT16_C( 3043), INT16_C( 146), -INT16_C( 5954), -INT16_C( 8174) } }, + { { INT16_C( 85), INT16_C( 1723), INT16_C( 9151), -INT16_C( 8826) }, + { -INT16_C( 9), INT16_C( 31), INT16_C( 59), -INT16_C( 41) }, + { -INT16_C( 10), -INT16_C( 9), -INT16_C( 30), -INT16_C( 22), + INT16_C( 78), -INT16_C( 57), -INT16_C( 52), -INT16_C( 20)}, + INT8_C( 2), + { -INT16_C( 185), INT16_C( 2653), INT16_C( 10921), -INT16_C( 10056) } }, + { { -INT16_C( 1161), -INT16_C( 7530), INT16_C( 9859), INT16_C( 9165) }, + { -INT16_C( 1), INT16_C( 66), -INT16_C( 95), INT16_C( 93) }, + { -INT16_C( 46), INT16_C( 73), -INT16_C( 24), INT16_C( 69), + INT16_C( 73), -INT16_C( 76), -INT16_C( 99), -INT16_C( 72)}, + INT8_C( 1), + { -INT16_C( 1088), -INT16_C( 12348), INT16_C( 16794), INT16_C( 2376) } }, + { { -INT16_C( 4474), INT16_C( 3652), -INT16_C( 45), INT16_C( 9361) }, + { -INT16_C( 70), INT16_C( 30), -INT16_C( 58), INT16_C( 85) }, + { -INT16_C( 88), INT16_C( 27), -INT16_C( 27), -INT16_C( 85), + INT16_C( 57), INT16_C( 70), -INT16_C( 5), INT16_C( 95)}, + INT8_C( 4), + { -INT16_C( 484), INT16_C( 1942), INT16_C( 3261), INT16_C( 4516) } }, + }; + + simde_int16x4_t r, a, b; + simde_int16x8_t v; + + for (int i = 0 ; i < 8 ; i++) { + a = simde_vld1_s16(test_vec[i].a); + b = simde_vld1_s16(test_vec[i].b); + v = simde_vld1q_s16(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmls_laneq_s16(a, b, v, 0); break; + case 1: r = simde_vmls_laneq_s16(a, b, v, 1); break; + case 2: r = simde_vmls_laneq_s16(a, b, v, 2); break; + case 3: r = simde_vmls_laneq_s16(a, b, v, 3); break; + case 4: r = simde_vmls_laneq_s16(a, b, v, 4); break; + case 5: r = simde_vmls_laneq_s16(a, b, v, 5); break; + case 6: r = simde_vmls_laneq_s16(a, b, v, 6); break; + case 7: r = simde_vmls_laneq_s16(a, b, v, 7); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdup_n_s16(0); break; + } + simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmls_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[4]; + int16_t b[4]; + int16_t v[4]; + int8_t lane; + int16_t r[4]; + } test_vec[] = { + { { -INT16_C( 844), INT16_C( 1656), -INT16_C( 9225), -INT16_C( 3760) }, + { -INT16_C( 57), INT16_C( 21), -INT16_C( 70), -INT16_C( 44) }, + { -INT16_C( 54), INT16_C( 72), -INT16_C( 79), INT16_C( 91)}, + INT8_C( 1), + { INT16_C( 3260), INT16_C( 144), -INT16_C( 4185), -INT16_C( 592) } }, + { { -INT16_C( 1909), INT16_C( 5115), -INT16_C( 6686), -INT16_C( 1069) }, + { INT16_C( 17), INT16_C( 25), INT16_C( 75), INT16_C( 85) }, + { INT16_C( 5), -INT16_C( 78), INT16_C( 60), -INT16_C( 8)}, + INT8_C( 0), + { -INT16_C( 1994), INT16_C( 4990), -INT16_C( 7061), -INT16_C( 1494) } }, + { { -INT16_C( 4046), -INT16_C( 8146), INT16_C( 505), -INT16_C( 1341) }, + { -INT16_C( 12), -INT16_C( 5), -INT16_C( 28), INT16_C( 29) }, + { -INT16_C( 14), INT16_C( 78), -INT16_C( 52), INT16_C( 0)}, + INT8_C( 3), + { -INT16_C( 4046), -INT16_C( 8146), INT16_C( 505), -INT16_C( 1341) } }, + { { -INT16_C( 5833), INT16_C( 786), INT16_C( 5359), -INT16_C( 1361) }, + { -INT16_C( 50), -INT16_C( 63), -INT16_C( 97), -INT16_C( 88) }, + { INT16_C( 22), -INT16_C( 1), INT16_C( 78), -INT16_C( 70)}, + INT8_C( 0), + { -INT16_C( 4733), INT16_C( 2172), INT16_C( 7493), INT16_C( 575) } }, + { { -INT16_C( 758), INT16_C( 1090), INT16_C( 4857), INT16_C( 3860) }, + { INT16_C( 65), INT16_C( 86), INT16_C( 64), INT16_C( 48) }, + { -INT16_C( 80), -INT16_C( 18), INT16_C( 76), INT16_C( 96)}, + INT8_C( 2), + { -INT16_C( 5698), -INT16_C( 5446), -INT16_C( 7), INT16_C( 212) } }, + { { -INT16_C( 4433), -INT16_C( 7839), INT16_C( 4286), -INT16_C( 3346) }, + { INT16_C( 72), INT16_C( 88), INT16_C( 99), -INT16_C( 11) }, + { -INT16_C( 2), -INT16_C( 18), -INT16_C( 51), INT16_C( 34)}, + INT8_C( 0), + { -INT16_C( 4289), -INT16_C( 7663), INT16_C( 4484), -INT16_C( 3368) } }, + { { INT16_C( 537), INT16_C( 902), INT16_C( 7163), -INT16_C( 6624) }, + { -INT16_C( 69), -INT16_C( 24), -INT16_C( 65), INT16_C( 18) }, + { -INT16_C( 38), -INT16_C( 98), -INT16_C( 5), INT16_C( 31)}, + INT8_C( 2), + { INT16_C( 192), INT16_C( 782), INT16_C( 6838), -INT16_C( 6534) } }, + { { -INT16_C( 7137), INT16_C( 4022), -INT16_C( 7194), -INT16_C( 5087) }, + { -INT16_C( 96), -INT16_C( 10), INT16_C( 36), INT16_C( 26) }, + { INT16_C( 39), -INT16_C( 28), -INT16_C( 28), INT16_C( 24)}, + INT8_C( 3), + { -INT16_C( 4833), INT16_C( 4262), -INT16_C( 8058), -INT16_C( 5711) } }, + }; + + simde_int16x4_t r, a, b, v; + + for (int i = 0 ; i < 8 ; i++) { + a = simde_vld1_s16(test_vec[i].a); + b = simde_vld1_s16(test_vec[i].b); + v = simde_vld1_s16(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmls_lane_s16(a, b, v, 0); break; + case 1: r = simde_vmls_lane_s16(a, b, v, 1); break; + case 2: r = simde_vmls_lane_s16(a, b, v, 2); break; + case 3: r = simde_vmls_lane_s16(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdup_n_s16(0); break; + } + simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmls_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[2]; + int32_t b[2]; + int32_t v[4]; + int8_t lane; + int32_t r[2]; + } test_vec[] = { + { { -INT32_C( 4609779), -INT32_C( 6446057) }, + { -INT32_C( 30), INT32_C( 9280) }, + { -INT32_C( 6491), -INT32_C( 7148), -INT32_C( 1963), -INT32_C( 7675)}, + INT8_C( 1), + { -INT32_C( 4824219), INT32_C( 59887383) } }, + { { INT32_C( 8476789), INT32_C( 9001967) }, + { INT32_C( 8417), INT32_C( 9240) }, + { INT32_C( 7274), -INT32_C( 7072), INT32_C( 8316), INT32_C( 1349)}, + INT8_C( 0), + { -INT32_C( 52748469), -INT32_C( 58209793) } }, + { { -INT32_C( 5257596), -INT32_C( 5794779) }, + { INT32_C( 5556), -INT32_C( 6177) }, + { INT32_C( 5916), -INT32_C( 4267), -INT32_C( 3838), -INT32_C( 6943)}, + INT8_C( 3), + { INT32_C( 33317712), -INT32_C( 48681690) } }, + { { -INT32_C( 2819565), -INT32_C( 3360540) }, + { -INT32_C( 4591), -INT32_C( 8694) }, + { -INT32_C( 2105), INT32_C( 9150), -INT32_C( 1627), -INT32_C( 4269)}, + INT8_C( 2), + { -INT32_C( 10289122), -INT32_C( 17505678) } }, + { { -INT32_C( 1360462), INT32_C( 1237828) }, + { INT32_C( 4841), INT32_C( 5659) }, + { INT32_C( 7557), INT32_C( 8904), -INT32_C( 6536), INT32_C( 2682)}, + INT8_C( 3), + { -INT32_C( 14344024), -INT32_C( 13939610) } }, + { { -INT32_C( 6369807), INT32_C( 3325182) }, + { INT32_C( 5679), INT32_C( 3989) }, + { INT32_C( 6822), -INT32_C( 2039), INT32_C( 6158), -INT32_C( 8783)}, + INT8_C( 0), + { -INT32_C( 45111945), -INT32_C( 23887776) } }, + { { INT32_C( 3683666), -INT32_C( 2734501) }, + { INT32_C( 2272), -INT32_C( 8611) }, + { -INT32_C( 9339), INT32_C( 7011), INT32_C( 13), -INT32_C( 5892)}, + INT8_C( 0), + { INT32_C( 24901874), -INT32_C( 83152630) } }, + { { INT32_C( 5399057), -INT32_C( 5721902) }, + { INT32_C( 9872), -INT32_C( 9010) }, + { -INT32_C( 6643), INT32_C( 1460), -INT32_C( 1642), -INT32_C( 1245)}, + INT8_C( 3), + { INT32_C( 17689697), -INT32_C( 16939352) } }, + }; + + simde_int32x2_t r, a, b; + simde_int32x4_t v; + + for (int i = 0 ; i < 8 ; i++) { + a = simde_vld1_s32(test_vec[i].a); + b = simde_vld1_s32(test_vec[i].b); + v = simde_vld1q_s32(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmls_laneq_s32(a, b, v, 0); break; + case 1: r = simde_vmls_laneq_s32(a, b, v, 1); break; + case 2: r = simde_vmls_laneq_s32(a, b, v, 2); break; + case 3: r = simde_vmls_laneq_s32(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdup_n_s32(0); break; + } + simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmls_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[2]; + int32_t b[2]; + int32_t v[2]; + int8_t lane; + int32_t r[2]; + } test_vec[] = { + { { -INT32_C( 2080167), INT32_C( 2975351) }, + { INT32_C( 3022), INT32_C( 7220) }, + { INT32_C( 3092), INT32_C( 9706)}, + INT8_C( 0), + { -INT32_C( 11424191), -INT32_C( 19348889) } }, + { { -INT32_C( 465851), INT32_C( 658061) }, + { INT32_C( 4649), INT32_C( 1947) }, + { INT32_C( 2147), -INT32_C( 6341)}, + INT8_C( 1), + { INT32_C( 29013458), INT32_C( 13003988) } }, + { { INT32_C( 1705178), -INT32_C( 2538948) }, + { -INT32_C( 9658), -INT32_C( 1207) }, + { INT32_C( 7192), INT32_C( 3209)}, + INT8_C( 0), + { INT32_C( 71165514), INT32_C( 6141796) } }, + { { -INT32_C( 8958043), INT32_C( 3475949) }, + { INT32_C( 2014), INT32_C( 3915) }, + { INT32_C( 6642), INT32_C( 3411)}, + INT8_C( 1), + { -INT32_C( 15827797), -INT32_C( 9878116) } }, + { { -INT32_C( 1876829), INT32_C( 3712281) }, + { -INT32_C( 1337), INT32_C( 9030) }, + { -INT32_C( 9018), -INT32_C( 310)}, + INT8_C( 1), + { -INT32_C( 2291299), INT32_C( 6511581) } }, + { { INT32_C( 4168416), -INT32_C( 4475757) }, + { -INT32_C( 2360), INT32_C( 7459) }, + { -INT32_C( 1695), -INT32_C( 4793)}, + INT8_C( 0), + { INT32_C( 168216), INT32_C( 8167248) } }, + { { -INT32_C( 690697), -INT32_C( 2328289) }, + { INT32_C( 2952), INT32_C( 9586) }, + { -INT32_C( 6161), -INT32_C( 2789)}, + INT8_C( 1), + { INT32_C( 7542431), INT32_C( 24407065) } }, + { { -INT32_C( 4863001), INT32_C( 1998250) }, + { -INT32_C( 9095), INT32_C( 3603) }, + { -INT32_C( 126), -INT32_C( 9930)}, + INT8_C( 1), + { -INT32_C( 95176351), INT32_C( 37776040) } }, + }; + + simde_int32x2_t r, a, b, v; + + for (int i = 0 ; i < 8 ; i++) { + a = simde_vld1_s32(test_vec[i].a); + b = simde_vld1_s32(test_vec[i].b); + v = simde_vld1_s32(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmls_lane_s32(a, b, v, 0); break; + case 1: r = simde_vmls_lane_s32(a, b, v, 1); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdup_n_s32(0); break; + } + simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmls_laneq_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[4]; + uint16_t b[4]; + uint16_t v[8]; + int8_t lane; + uint16_t r[4]; + } test_vec[] = { + { { UINT16_C( 16158), UINT16_C( 1489), UINT16_C( 1476), UINT16_C( 1624) }, + { UINT16_C( 190), UINT16_C( 62), UINT16_C( 32), UINT16_C( 196) }, + { UINT16_C( 3), UINT16_C( 178), UINT16_C( 81), UINT16_C( 183), + UINT16_C( 15), UINT16_C( 169), UINT16_C( 179), UINT16_C( 6)}, + INT8_C( 1), + { UINT16_C( 47874), UINT16_C( 55989), UINT16_C( 61316), UINT16_C( 32272) } }, + { { UINT16_C( 3456), UINT16_C( 18454), UINT16_C( 16619), UINT16_C( 18609) }, + { UINT16_C( 78), UINT16_C( 11), UINT16_C( 32), UINT16_C( 169) }, + { UINT16_C( 78), UINT16_C( 190), UINT16_C( 83), UINT16_C( 108), + UINT16_C( 165), UINT16_C( 8), UINT16_C( 139), UINT16_C( 15)}, + INT8_C( 4), + { UINT16_C( 56122), UINT16_C( 16639), UINT16_C( 11339), UINT16_C( 56260) } }, + { { UINT16_C( 16140), UINT16_C( 1449), UINT16_C( 7340), UINT16_C( 4164) }, + { UINT16_C( 135), UINT16_C( 18), UINT16_C( 102), UINT16_C( 163) }, + { UINT16_C( 122), UINT16_C( 63), UINT16_C( 45), UINT16_C( 138), + UINT16_C( 90), UINT16_C( 111), UINT16_C( 13), UINT16_C( 3)}, + INT8_C( 6), + { UINT16_C( 14385), UINT16_C( 1215), UINT16_C( 6014), UINT16_C( 2045) } }, + { { UINT16_C( 7749), UINT16_C( 10798), UINT16_C( 9859), UINT16_C( 14909) }, + { UINT16_C( 104), UINT16_C( 76), UINT16_C( 144), UINT16_C( 139) }, + { UINT16_C( 116), UINT16_C( 190), UINT16_C( 180), UINT16_C( 132), + UINT16_C( 45), UINT16_C( 47), UINT16_C( 132), UINT16_C( 27)}, + INT8_C( 1), + { UINT16_C( 53525), UINT16_C( 61894), UINT16_C( 48035), UINT16_C( 54035) } }, + { { UINT16_C( 474), UINT16_C( 4091), UINT16_C( 10437), UINT16_C( 2089) }, + { UINT16_C( 172), UINT16_C( 33), UINT16_C( 87), UINT16_C( 94) }, + { UINT16_C( 18), UINT16_C( 184), UINT16_C( 158), UINT16_C( 8), + UINT16_C( 76), UINT16_C( 22), UINT16_C( 30), UINT16_C( 122)}, + INT8_C( 3), + { UINT16_C( 64634), UINT16_C( 3827), UINT16_C( 9741), UINT16_C( 1337) } }, + { { UINT16_C( 13153), UINT16_C( 16958), UINT16_C( 1130), UINT16_C( 7923) }, + { UINT16_C( 121), UINT16_C( 21), UINT16_C( 123), UINT16_C( 2) }, + { UINT16_C( 131), UINT16_C( 3), UINT16_C( 71), UINT16_C( 42), + UINT16_C( 120), UINT16_C( 190), UINT16_C( 1), UINT16_C( 121)}, + INT8_C( 3), + { UINT16_C( 8071), UINT16_C( 16076), UINT16_C( 61500), UINT16_C( 7839) } }, + { { UINT16_C( 15740), UINT16_C( 12791), UINT16_C( 14506), UINT16_C( 5419) }, + { UINT16_C( 61), UINT16_C( 152), UINT16_C( 123), UINT16_C( 118) }, + { UINT16_C( 104), UINT16_C( 148), UINT16_C( 151), UINT16_C( 186), + UINT16_C( 115), UINT16_C( 9), UINT16_C( 77), UINT16_C( 27)}, + INT8_C( 2), + { UINT16_C( 6529), UINT16_C( 55375), UINT16_C( 61469), UINT16_C( 53137) } }, + { { UINT16_C( 1816), UINT16_C( 10121), UINT16_C( 4977), UINT16_C( 7363) }, + { UINT16_C( 6), UINT16_C( 164), UINT16_C( 12), UINT16_C( 16) }, + { UINT16_C( 146), UINT16_C( 161), UINT16_C( 162), UINT16_C( 50), + UINT16_C( 26), UINT16_C( 54), UINT16_C( 195), UINT16_C( 165)}, + INT8_C( 4), + { UINT16_C( 1660), UINT16_C( 5857), UINT16_C( 4665), UINT16_C( 6947) } }, + }; + + simde_uint16x4_t r, a, b; + simde_uint16x8_t v; + + for (int i = 0 ; i < 8 ; i++) { + a = simde_vld1_u16(test_vec[i].a); + b = simde_vld1_u16(test_vec[i].b); + v = simde_vld1q_u16(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmls_laneq_u16(a, b, v, 0); break; + case 1: r = simde_vmls_laneq_u16(a, b, v, 1); break; + case 2: r = simde_vmls_laneq_u16(a, b, v, 2); break; + case 3: r = simde_vmls_laneq_u16(a, b, v, 3); break; + case 4: r = simde_vmls_laneq_u16(a, b, v, 4); break; + case 5: r = simde_vmls_laneq_u16(a, b, v, 5); break; + case 6: r = simde_vmls_laneq_u16(a, b, v, 6); break; + case 7: r = simde_vmls_laneq_u16(a, b, v, 7); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdup_n_u16(0); break; + } + simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmls_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[4]; + uint16_t b[4]; + uint16_t v[4]; + int8_t lane; + uint16_t r[4]; + } test_vec[] = { + { { UINT16_C( 18984), UINT16_C( 18009), UINT16_C( 13809), UINT16_C( 17765) }, + { UINT16_C( 179), UINT16_C( 96), UINT16_C( 129), UINT16_C( 114) }, + { UINT16_C( 43), UINT16_C( 86), UINT16_C( 106), UINT16_C( 167)}, + INT8_C( 0), + { UINT16_C( 11287), UINT16_C( 13881), UINT16_C( 8262), UINT16_C( 12863) } }, + { { UINT16_C( 12258), UINT16_C( 4109), UINT16_C( 6281), UINT16_C( 16020) }, + { UINT16_C( 100), UINT16_C( 174), UINT16_C( 26), UINT16_C( 75) }, + { UINT16_C( 193), UINT16_C( 65), UINT16_C( 7), UINT16_C( 123)}, + INT8_C( 3), + { UINT16_C( 65494), UINT16_C( 48243), UINT16_C( 3083), UINT16_C( 6795) } }, + { { UINT16_C( 11938), UINT16_C( 2513), UINT16_C( 9492), UINT16_C( 13021) }, + { UINT16_C( 19), UINT16_C( 119), UINT16_C( 58), UINT16_C( 146) }, + { UINT16_C( 193), UINT16_C( 73), UINT16_C( 147), UINT16_C( 109)}, + INT8_C( 0), + { UINT16_C( 8271), UINT16_C( 45082), UINT16_C( 63834), UINT16_C( 50379) } }, + { { UINT16_C( 3390), UINT16_C( 3694), UINT16_C( 13872), UINT16_C( 10975) }, + { UINT16_C( 92), UINT16_C( 33), UINT16_C( 159), UINT16_C( 158) }, + { UINT16_C( 61), UINT16_C( 156), UINT16_C( 44), UINT16_C( 42)}, + INT8_C( 1), + { UINT16_C( 54574), UINT16_C( 64082), UINT16_C( 54604), UINT16_C( 51863) } }, + { { UINT16_C( 5809), UINT16_C( 7103), UINT16_C( 15284), UINT16_C( 5993) }, + { UINT16_C( 107), UINT16_C( 47), UINT16_C( 90), UINT16_C( 189) }, + { UINT16_C( 83), UINT16_C( 43), UINT16_C( 122), UINT16_C( 58)}, + INT8_C( 3), + { UINT16_C( 65139), UINT16_C( 4377), UINT16_C( 10064), UINT16_C( 60567) } }, + { { UINT16_C( 9094), UINT16_C( 2904), UINT16_C( 17008), UINT16_C( 2752) }, + { UINT16_C( 147), UINT16_C( 194), UINT16_C( 149), UINT16_C( 28) }, + { UINT16_C( 157), UINT16_C( 140), UINT16_C( 104), UINT16_C( 96)}, + INT8_C( 2), + { UINT16_C( 59342), UINT16_C( 48264), UINT16_C( 1512), UINT16_C( 65376) } }, + { { UINT16_C( 6469), UINT16_C( 19343), UINT16_C( 12256), UINT16_C( 15543) }, + { UINT16_C( 129), UINT16_C( 0), UINT16_C( 37), UINT16_C( 78) }, + { UINT16_C( 186), UINT16_C( 42), UINT16_C( 145), UINT16_C( 31)}, + INT8_C( 3), + { UINT16_C( 2470), UINT16_C( 19343), UINT16_C( 11109), UINT16_C( 13125) } }, + { { UINT16_C( 14549), UINT16_C( 4486), UINT16_C( 13494), UINT16_C( 5232) }, + { UINT16_C( 144), UINT16_C( 10), UINT16_C( 198), UINT16_C( 165) }, + { UINT16_C( 28), UINT16_C( 10), UINT16_C( 42), UINT16_C( 25)}, + INT8_C( 1), + { UINT16_C( 13109), UINT16_C( 4386), UINT16_C( 11514), UINT16_C( 3582) } }, + }; + + simde_uint16x4_t r, a, b, v; + + for (int i = 0 ; i < 8 ; i++) { + a = simde_vld1_u16(test_vec[i].a); + b = simde_vld1_u16(test_vec[i].b); + v = simde_vld1_u16(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmls_lane_u16(a, b, v, 0); break; + case 1: r = simde_vmls_lane_u16(a, b, v, 1); break; + case 2: r = simde_vmls_lane_u16(a, b, v, 2); break; + case 3: r = simde_vmls_lane_u16(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdup_n_u16(0); break; + } + simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmls_laneq_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[2]; + uint32_t b[2]; + uint32_t v[4]; + int8_t lane; + uint32_t r[2]; + } test_vec[] = { + { { UINT32_C( 14144199), UINT32_C( 13604911) }, + { UINT32_C( 12510), UINT32_C( 6546) }, + { UINT32_C( 15820), UINT32_C( 9959), UINT32_C( 2597), UINT32_C( 2845)}, + INT8_C( 0), + { UINT32_C(4111203295), UINT32_C(4205014487) } }, + { { UINT32_C( 3498803), UINT32_C( 10348791) }, + { UINT32_C( 6329), UINT32_C( 17531) }, + { UINT32_C( 11854), UINT32_C( 6701), UINT32_C( 10772), UINT32_C( 17366)}, + INT8_C( 2), + { UINT32_C(4230290111), UINT32_C(4116472155) } }, + { { UINT32_C( 9344782), UINT32_C( 14659312) }, + { UINT32_C( 10437), UINT32_C( 10420) }, + { UINT32_C( 906), UINT32_C( 18644), UINT32_C( 16508), UINT32_C( 16678)}, + INT8_C( 0), + { UINT32_C(4294856156), UINT32_C( 5218792) } }, + { { UINT32_C( 13608515), UINT32_C( 3430158) }, + { UINT32_C( 13755), UINT32_C( 9868) }, + { UINT32_C( 7174), UINT32_C( 17760), UINT32_C( 16856), UINT32_C( 8296)}, + INT8_C( 0), + { UINT32_C(4209897441), UINT32_C(4227604422) } }, + { { UINT32_C( 1618132), UINT32_C( 12547889) }, + { UINT32_C( 8099), UINT32_C( 18860) }, + { UINT32_C( 1098), UINT32_C( 5482), UINT32_C( 8767), UINT32_C( 15482)}, + INT8_C( 2), + { UINT32_C(4225581495), UINT32_C(4142169565) } }, + { { UINT32_C( 13224238), UINT32_C( 6652430) }, + { UINT32_C( 6837), UINT32_C( 13996) }, + { UINT32_C( 138), UINT32_C( 4074), UINT32_C( 5577), UINT32_C( 9942)}, + INT8_C( 2), + { UINT32_C(4270061585), UINT32_C(4223564034) } }, + { { UINT32_C( 11742483), UINT32_C( 9802174) }, + { UINT32_C( 18818), UINT32_C( 6761) }, + { UINT32_C( 2308), UINT32_C( 18630), UINT32_C( 9349), UINT32_C( 11315)}, + INT8_C( 0), + { UINT32_C(4263277835), UINT32_C(4289165082) } }, + { { UINT32_C( 16070524), UINT32_C( 18562582) }, + { UINT32_C( 8469), UINT32_C( 8470) }, + { UINT32_C( 19381), UINT32_C( 196), UINT32_C( 3731), UINT32_C( 11432)}, + INT8_C( 2), + { UINT32_C(4279439981), UINT32_C(4281928308) } }, + }; + + simde_uint32x2_t r, a, b; + simde_uint32x4_t v; + + for (int i = 0 ; i < 8 ; i++) { + a = simde_vld1_u32(test_vec[i].a); + b = simde_vld1_u32(test_vec[i].b); + v = simde_vld1q_u32(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmls_laneq_u32(a, b, v, 0); break; + case 1: r = simde_vmls_laneq_u32(a, b, v, 1); break; + case 2: r = simde_vmls_laneq_u32(a, b, v, 2); break; + case 3: r = simde_vmls_laneq_u32(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdup_n_u32(0); break; + } + simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmls_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[2]; + uint32_t b[2]; + uint32_t v[2]; + int8_t lane; + uint32_t r[2]; + } test_vec[] = { + { { UINT32_C( 13216612), UINT32_C( 6916115) }, + { UINT32_C( 3793), UINT32_C( 5600) }, + { UINT32_C( 12665), UINT32_C( 1022)}, + INT8_C( 1), + { UINT32_C( 9340166), UINT32_C( 1192915) } }, + { { UINT32_C( 2375454), UINT32_C( 18597104) }, + { UINT32_C( 79), UINT32_C( 19053) }, + { UINT32_C( 13489), UINT32_C( 1823)}, + INT8_C( 0), + { UINT32_C( 1309823), UINT32_C(4056558483) } }, + { { UINT32_C( 9769189), UINT32_C( 5744419) }, + { UINT32_C( 4085), UINT32_C( 5904) }, + { UINT32_C( 10452), UINT32_C( 9697)}, + INT8_C( 1), + { UINT32_C(4265124240), UINT32_C(4243460627) } }, + { { UINT32_C( 7587765), UINT32_C( 17610709) }, + { UINT32_C( 11713), UINT32_C( 10443) }, + { UINT32_C( 4694), UINT32_C( 6989)}, + INT8_C( 1), + { UINT32_C(4220692904), UINT32_C(4239591878) } }, + { { UINT32_C( 19740570), UINT32_C( 13143896) }, + { UINT32_C( 8559), UINT32_C( 3531) }, + { UINT32_C( 17972), UINT32_C( 6011)}, + INT8_C( 0), + { UINT32_C(4160885518), UINT32_C(4244652060) } }, + { { UINT32_C( 197694), UINT32_C( 8496100) }, + { UINT32_C( 12350), UINT32_C( 9487) }, + { UINT32_C( 10478), UINT32_C( 18312)}, + INT8_C( 1), + { UINT32_C(4069011790), UINT32_C(4129737452) } }, + { { UINT32_C( 14294595), UINT32_C( 19729101) }, + { UINT32_C( 9091), UINT32_C( 17352) }, + { UINT32_C( 14572), UINT32_C( 16640)}, + INT8_C( 1), + { UINT32_C(4157987651), UINT32_C(4025959117) } }, + { { UINT32_C( 14216084), UINT32_C( 10950830) }, + { UINT32_C( 10797), UINT32_C( 15180) }, + { UINT32_C( 1599), UINT32_C( 12973)}, + INT8_C( 1), + { UINT32_C(4169113899), UINT32_C(4108987986) } }, + }; + + simde_uint32x2_t r, a, b, v; + + for (int i = 0 ; i < 8 ; i++) { + a = simde_vld1_u32(test_vec[i].a); + b = simde_vld1_u32(test_vec[i].b); + v = simde_vld1_u32(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmls_lane_u32(a, b, v, 0); break; + case 1: r = simde_vmls_lane_u32(a, b, v, 1); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdup_n_u32(0); break; + } + simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlsq_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + float a[4]; + float b[4]; + float v[2]; + int8_t lane; + float r[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 6491.42), SIMDE_FLOAT32_C( -8451.97), SIMDE_FLOAT32_C( 2635.60), SIMDE_FLOAT32_C( -5083.51) }, + { SIMDE_FLOAT32_C( 91.33), SIMDE_FLOAT32_C( 59.15), SIMDE_FLOAT32_C( -50.14), SIMDE_FLOAT32_C( 41.16) }, + { SIMDE_FLOAT32_C( 15.14), SIMDE_FLOAT32_C( 1.38)}, + INT8_C( 0), + { SIMDE_FLOAT32_C( 5108.68), SIMDE_FLOAT32_C( -9347.50), SIMDE_FLOAT32_C( 3394.72), SIMDE_FLOAT32_C( -5706.67) } }, + { { SIMDE_FLOAT32_C( -7184.09), SIMDE_FLOAT32_C( -280.62), SIMDE_FLOAT32_C( 8124.68), SIMDE_FLOAT32_C( -6858.34) }, + { SIMDE_FLOAT32_C( 48.46), SIMDE_FLOAT32_C( 90.92), SIMDE_FLOAT32_C( -96.60), SIMDE_FLOAT32_C( -81.19) }, + { SIMDE_FLOAT32_C( 67.28), SIMDE_FLOAT32_C( -71.80)}, + INT8_C( 0), + { SIMDE_FLOAT32_C(-10444.48), SIMDE_FLOAT32_C( -6397.72), SIMDE_FLOAT32_C( 14623.93), SIMDE_FLOAT32_C( -1395.88) } }, + { { SIMDE_FLOAT32_C( 335.77), SIMDE_FLOAT32_C( -4602.64), SIMDE_FLOAT32_C( 3583.71), SIMDE_FLOAT32_C( 6101.10) }, + { SIMDE_FLOAT32_C( -98.57), SIMDE_FLOAT32_C( -93.96), SIMDE_FLOAT32_C( -60.94), SIMDE_FLOAT32_C( 8.99) }, + { SIMDE_FLOAT32_C( 20.87), SIMDE_FLOAT32_C( 85.47)}, + INT8_C( 1), + { SIMDE_FLOAT32_C( 8760.55), SIMDE_FLOAT32_C( 3428.12), SIMDE_FLOAT32_C( 8792.25), SIMDE_FLOAT32_C( 5332.72) } }, + { { SIMDE_FLOAT32_C( -125.83), SIMDE_FLOAT32_C( 3874.30), SIMDE_FLOAT32_C( -9217.65), SIMDE_FLOAT32_C( -1981.56) }, + { SIMDE_FLOAT32_C( 7.03), SIMDE_FLOAT32_C( 22.24), SIMDE_FLOAT32_C( 49.28), SIMDE_FLOAT32_C( -99.57) }, + { SIMDE_FLOAT32_C( 40.30), SIMDE_FLOAT32_C( 41.81)}, + INT8_C( 0), + { SIMDE_FLOAT32_C( -409.14), SIMDE_FLOAT32_C( 2978.03), SIMDE_FLOAT32_C(-11203.63), SIMDE_FLOAT32_C( 2031.11) } }, + { { SIMDE_FLOAT32_C( -1543.29), SIMDE_FLOAT32_C( 7159.22), SIMDE_FLOAT32_C( 6874.76), SIMDE_FLOAT32_C( 5014.46) }, + { SIMDE_FLOAT32_C( 37.12), SIMDE_FLOAT32_C( -51.42), SIMDE_FLOAT32_C( 46.07), SIMDE_FLOAT32_C( -64.25) }, + { SIMDE_FLOAT32_C( -87.56), SIMDE_FLOAT32_C( 54.83)}, + INT8_C( 0), + { SIMDE_FLOAT32_C( 1706.94), SIMDE_FLOAT32_C( 2656.88), SIMDE_FLOAT32_C( 10908.65), SIMDE_FLOAT32_C( -611.27) } }, + { { SIMDE_FLOAT32_C( 4071.42), SIMDE_FLOAT32_C( 3296.60), SIMDE_FLOAT32_C( 5251.51), SIMDE_FLOAT32_C( 9515.43) }, + { SIMDE_FLOAT32_C( -87.49), SIMDE_FLOAT32_C( 52.57), SIMDE_FLOAT32_C( -48.38), SIMDE_FLOAT32_C( -43.24) }, + { SIMDE_FLOAT32_C( -43.73), SIMDE_FLOAT32_C( -45.74)}, + INT8_C( 1), + { SIMDE_FLOAT32_C( 69.63), SIMDE_FLOAT32_C( 5701.15), SIMDE_FLOAT32_C( 3038.61), SIMDE_FLOAT32_C( 7537.63) } }, + { { SIMDE_FLOAT32_C( -1979.85), SIMDE_FLOAT32_C( 1741.08), SIMDE_FLOAT32_C( 6060.61), SIMDE_FLOAT32_C( 8761.64) }, + { SIMDE_FLOAT32_C( 95.94), SIMDE_FLOAT32_C( 6.34), SIMDE_FLOAT32_C( 38.44), SIMDE_FLOAT32_C( 61.53) }, + { SIMDE_FLOAT32_C( 35.98), SIMDE_FLOAT32_C( 82.11)}, + INT8_C( 1), + { SIMDE_FLOAT32_C( -9857.48), SIMDE_FLOAT32_C( 1220.50), SIMDE_FLOAT32_C( 2904.30), SIMDE_FLOAT32_C( 3709.41) } }, + { { SIMDE_FLOAT32_C( -4915.96), SIMDE_FLOAT32_C( 8376.01), SIMDE_FLOAT32_C( -5757.71), SIMDE_FLOAT32_C( 4008.47) }, + { SIMDE_FLOAT32_C( 59.56), SIMDE_FLOAT32_C( 63.00), SIMDE_FLOAT32_C( 22.15), SIMDE_FLOAT32_C( -38.60) }, + { SIMDE_FLOAT32_C( -8.68), SIMDE_FLOAT32_C( -8.36)}, + INT8_C( 1), + { SIMDE_FLOAT32_C( -4418.04), SIMDE_FLOAT32_C( 8902.69), SIMDE_FLOAT32_C( -5572.54), SIMDE_FLOAT32_C( 3685.77) } }, + }; + + simde_float32x4_t r, a, b; + simde_float32x2_t v; + + for (int i = 0 ; i < 8 ; i++) { + a = simde_vld1q_f32(test_vec[i].a); + b = simde_vld1q_f32(test_vec[i].b); + v = simde_vld1_f32(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmlsq_lane_f32(a, b, v, 0); break; + case 1: r = simde_vmlsq_lane_f32(a, b, v, 1); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_f32(0); break; + } + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + } + + return 0; +} + +static int +test_simde_vmlsq_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[8]; + int16_t b[8]; + int16_t v[4]; + int8_t lane; + int16_t r[8]; + } test_vec[] = { + { { -INT16_C( 5188), -INT16_C( 8438), -INT16_C( 5231), INT16_C( 7587), + INT16_C( 8978), INT16_C( 5785), INT16_C( 6040), -INT16_C( 2642) }, + { -INT16_C( 5), -INT16_C( 8), INT16_C( 70), -INT16_C( 19), + INT16_C( 56), -INT16_C( 58), INT16_C( 50), INT16_C( 13) }, + { -INT16_C( 1), INT16_C( 14), -INT16_C( 33), INT16_C( 63)}, + INT8_C( 1), + { -INT16_C( 5118), -INT16_C( 8326), -INT16_C( 6211), INT16_C( 7853), + INT16_C( 8194), INT16_C( 6597), INT16_C( 5340), -INT16_C( 2824) } }, + { { -INT16_C( 3763), -INT16_C( 2628), -INT16_C( 344), -INT16_C( 6423), + INT16_C( 756), INT16_C( 826), INT16_C( 6410), -INT16_C( 4702) }, + { INT16_C( 42), -INT16_C( 4), -INT16_C( 18), INT16_C( 78), + -INT16_C( 93), -INT16_C( 60), INT16_C( 76), INT16_C( 38) }, + { -INT16_C( 51), -INT16_C( 8), -INT16_C( 33), INT16_C( 42)}, + INT8_C( 3), + { -INT16_C( 5527), -INT16_C( 2460), INT16_C( 412), -INT16_C( 9699), + INT16_C( 4662), INT16_C( 3346), INT16_C( 3218), -INT16_C( 6298) } }, + { { INT16_C( 1804), -INT16_C( 8461), INT16_C( 7204), INT16_C( 4852), + INT16_C( 2225), INT16_C( 3888), INT16_C( 3305), -INT16_C( 4162) }, + { INT16_C( 6), -INT16_C( 47), -INT16_C( 39), -INT16_C( 73), + INT16_C( 65), INT16_C( 65), INT16_C( 3), INT16_C( 34) }, + { -INT16_C( 21), -INT16_C( 62), INT16_C( 26), -INT16_C( 99)}, + INT8_C( 2), + { INT16_C( 1648), -INT16_C( 7239), INT16_C( 8218), INT16_C( 6750), + INT16_C( 535), INT16_C( 2198), INT16_C( 3227), -INT16_C( 5046) } }, + { { -INT16_C( 7133), -INT16_C( 549), -INT16_C( 325), -INT16_C( 6686), + -INT16_C( 1679), -INT16_C( 7506), -INT16_C( 7943), INT16_C( 8844) }, + { -INT16_C( 45), -INT16_C( 81), INT16_C( 64), -INT16_C( 69), + INT16_C( 41), -INT16_C( 13), INT16_C( 74), -INT16_C( 73) }, + { -INT16_C( 91), INT16_C( 50), INT16_C( 13), -INT16_C( 18)}, + INT8_C( 2), + { -INT16_C( 6548), INT16_C( 504), -INT16_C( 1157), -INT16_C( 5789), + -INT16_C( 2212), -INT16_C( 7337), -INT16_C( 8905), INT16_C( 9793) } }, + { { -INT16_C( 4878), -INT16_C( 8602), -INT16_C( 8189), -INT16_C( 2681), + INT16_C( 7338), -INT16_C( 3498), -INT16_C( 8694), -INT16_C( 671) }, + { INT16_C( 51), -INT16_C( 60), -INT16_C( 62), INT16_C( 21), + INT16_C( 75), -INT16_C( 18), INT16_C( 30), INT16_C( 70) }, + { INT16_C( 83), INT16_C( 5), INT16_C( 98), INT16_C( 90)}, + INT8_C( 0), + { -INT16_C( 9111), -INT16_C( 3622), -INT16_C( 3043), -INT16_C( 4424), + INT16_C( 1113), -INT16_C( 2004), -INT16_C( 11184), -INT16_C( 6481) } }, + { { INT16_C( 5619), -INT16_C( 6470), INT16_C( 7000), INT16_C( 5919), + -INT16_C( 1294), INT16_C( 8602), INT16_C( 5031), -INT16_C( 345) }, + { INT16_C( 54), -INT16_C( 39), INT16_C( 73), -INT16_C( 31), + INT16_C( 92), INT16_C( 68), INT16_C( 88), -INT16_C( 31) }, + { -INT16_C( 29), -INT16_C( 82), INT16_C( 90), -INT16_C( 54)}, + INT8_C( 1), + { INT16_C( 10047), -INT16_C( 9668), INT16_C( 12986), INT16_C( 3377), + INT16_C( 6250), INT16_C( 14178), INT16_C( 12247), -INT16_C( 2887) } }, + { { -INT16_C( 7748), -INT16_C( 1274), INT16_C( 3421), -INT16_C( 4248), + INT16_C( 4815), INT16_C( 4796), -INT16_C( 1528), -INT16_C( 2191) }, + { -INT16_C( 15), INT16_C( 17), INT16_C( 35), -INT16_C( 26), + INT16_C( 16), INT16_C( 54), -INT16_C( 24), -INT16_C( 13) }, + { -INT16_C( 67), -INT16_C( 16), INT16_C( 34), -INT16_C( 37)}, + INT8_C( 3), + { -INT16_C( 8303), -INT16_C( 645), INT16_C( 4716), -INT16_C( 5210), + INT16_C( 5407), INT16_C( 6794), -INT16_C( 2416), -INT16_C( 2672) } }, + { { INT16_C( 8579), -INT16_C( 2628), -INT16_C( 3924), INT16_C( 7591), + -INT16_C( 6125), INT16_C( 1568), -INT16_C( 7313), -INT16_C( 8291) }, + { INT16_C( 86), INT16_C( 13), -INT16_C( 67), -INT16_C( 1), + INT16_C( 22), INT16_C( 30), -INT16_C( 80), -INT16_C( 69) }, + { -INT16_C( 46), -INT16_C( 41), -INT16_C( 57), -INT16_C( 70)}, + INT8_C( 0), + { INT16_C( 12535), -INT16_C( 2030), -INT16_C( 7006), INT16_C( 7545), + -INT16_C( 5113), INT16_C( 2948), -INT16_C( 10993), -INT16_C( 11465) } }, + }; + + simde_int16x8_t r, a, b; + simde_int16x4_t v; + + for (int i = 0 ; i < 8 ; i++) { + a = simde_vld1q_s16(test_vec[i].a); + b = simde_vld1q_s16(test_vec[i].b); + v = simde_vld1_s16(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmlsq_lane_s16(a, b, v, 0); break; + case 1: r = simde_vmlsq_lane_s16(a, b, v, 1); break; + case 2: r = simde_vmlsq_lane_s16(a, b, v, 2); break; + case 3: r = simde_vmlsq_lane_s16(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_s16(0); break; + } + simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlsq_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int32_t b[4]; + int32_t v[2]; + int8_t lane; + int32_t r[4]; + } test_vec[] = { + { { INT32_C( 2067537), -INT32_C( 1787354), INT32_C( 759280), INT32_C( 795779) }, + { -INT32_C( 7988), INT32_C( 2018), -INT32_C( 2719), -INT32_C( 7388) }, + { -INT32_C( 6378), INT32_C( 8353)}, + INT8_C( 1), + { INT32_C( 68791301), -INT32_C( 18643708), INT32_C( 23471087), INT32_C( 62507743) } }, + { { INT32_C( 7988559), -INT32_C( 6030795), INT32_C( 5763101), INT32_C( 9307978) }, + { INT32_C( 2284), -INT32_C( 2903), -INT32_C( 6517), -INT32_C( 4591) }, + { INT32_C( 7512), INT32_C( 2307)}, + INT8_C( 1), + { INT32_C( 2719371), INT32_C( 666426), INT32_C( 20797820), INT32_C( 19899415) } }, + { { INT32_C( 4554261), INT32_C( 1170211), -INT32_C( 3300171), -INT32_C( 5880130) }, + { INT32_C( 649), INT32_C( 1731), INT32_C( 1619), -INT32_C( 7917) }, + { -INT32_C( 3541), -INT32_C( 447)}, + INT8_C( 1), + { INT32_C( 4844364), INT32_C( 1943968), -INT32_C( 2576478), -INT32_C( 9419029) } }, + { { -INT32_C( 4974825), -INT32_C( 2714283), INT32_C( 8561422), -INT32_C( 9197423) }, + { INT32_C( 8661), -INT32_C( 7895), -INT32_C( 9028), -INT32_C( 6302) }, + { INT32_C( 443), -INT32_C( 8284)}, + INT8_C( 1), + { INT32_C( 66772899), -INT32_C( 68116463), -INT32_C( 66226530), -INT32_C( 61403191) } }, + { { -INT32_C( 4166337), -INT32_C( 3164084), -INT32_C( 8639249), -INT32_C( 2650523) }, + { -INT32_C( 5405), INT32_C( 3318), INT32_C( 9093), -INT32_C( 225) }, + { -INT32_C( 9450), INT32_C( 5112)}, + INT8_C( 0), + { -INT32_C( 55243587), INT32_C( 28191016), INT32_C( 77289601), -INT32_C( 4776773) } }, + { { -INT32_C( 7102308), -INT32_C( 7309928), INT32_C( 805822), INT32_C( 3308797) }, + { INT32_C( 8319), INT32_C( 7694), -INT32_C( 6392), INT32_C( 6437) }, + { INT32_C( 2166), -INT32_C( 6721)}, + INT8_C( 1), + { INT32_C( 48809691), INT32_C( 44401446), -INT32_C( 42154810), INT32_C( 46571874) } }, + { { INT32_C( 9895003), -INT32_C( 4484512), -INT32_C( 6521721), INT32_C( 5731200) }, + { -INT32_C( 9410), INT32_C( 2568), -INT32_C( 9305), INT32_C( 8401) }, + { INT32_C( 2689), INT32_C( 267)}, + INT8_C( 0), + { INT32_C( 35198493), -INT32_C( 11389864), INT32_C( 18499424), -INT32_C( 16859089) } }, + { { INT32_C( 4672068), INT32_C( 1710369), INT32_C( 3119765), -INT32_C( 5266593) }, + { INT32_C( 2214), -INT32_C( 5836), -INT32_C( 8206), -INT32_C( 787) }, + { -INT32_C( 841), INT32_C( 4393)}, + INT8_C( 0), + { INT32_C( 6534042), -INT32_C( 3197707), -INT32_C( 3781481), -INT32_C( 5928460) } }, + }; + + simde_int32x4_t r, a, b; + simde_int32x2_t v; + + for (int i = 0 ; i < 8 ; i++) { + a = simde_vld1q_s32(test_vec[i].a); + b = simde_vld1q_s32(test_vec[i].b); + v = simde_vld1_s32(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmlsq_lane_s32(a, b, v, 0); break; + case 1: r = simde_vmlsq_lane_s32(a, b, v, 1); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_s32(0); break; + } + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlsq_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[8]; + uint16_t b[8]; + uint16_t v[4]; + int8_t lane; + uint16_t r[8]; + } test_vec[] = { + { { UINT16_C( 17838), UINT16_C( 13174), UINT16_C( 11119), UINT16_C( 8705), + UINT16_C( 11723), UINT16_C( 17154), UINT16_C( 8765), UINT16_C( 16080) }, + { UINT16_C( 128), UINT16_C( 5), UINT16_C( 92), UINT16_C( 150), + UINT16_C( 146), UINT16_C( 164), UINT16_C( 83), UINT16_C( 118) }, + { UINT16_C( 173), UINT16_C( 106), UINT16_C( 70), UINT16_C( 166)}, + INT8_C( 0), + { UINT16_C( 61230), UINT16_C( 12309), UINT16_C( 60739), UINT16_C( 48291), + UINT16_C( 52001), UINT16_C( 54318), UINT16_C( 59942), UINT16_C( 61202) } }, + { { UINT16_C( 16451), UINT16_C( 4363), UINT16_C( 18275), UINT16_C( 16052), + UINT16_C( 15197), UINT16_C( 4998), UINT16_C( 15634), UINT16_C( 323) }, + { UINT16_C( 146), UINT16_C( 102), UINT16_C( 124), UINT16_C( 25), + UINT16_C( 181), UINT16_C( 21), UINT16_C( 162), UINT16_C( 88) }, + { UINT16_C( 90), UINT16_C( 149), UINT16_C( 114), UINT16_C( 74)}, + INT8_C( 3), + { UINT16_C( 5647), UINT16_C( 62351), UINT16_C( 9099), UINT16_C( 14202), + UINT16_C( 1803), UINT16_C( 3444), UINT16_C( 3646), UINT16_C( 59347) } }, + { { UINT16_C( 8467), UINT16_C( 10164), UINT16_C( 9025), UINT16_C( 5585), + UINT16_C( 3018), UINT16_C( 456), UINT16_C( 14551), UINT16_C( 1080) }, + { UINT16_C( 170), UINT16_C( 112), UINT16_C( 127), UINT16_C( 129), + UINT16_C( 32), UINT16_C( 42), UINT16_C( 191), UINT16_C( 144) }, + { UINT16_C( 18), UINT16_C( 181), UINT16_C( 100), UINT16_C( 0)}, + INT8_C( 2), + { UINT16_C( 57003), UINT16_C( 64500), UINT16_C( 61861), UINT16_C( 58221), + UINT16_C( 65354), UINT16_C( 61792), UINT16_C( 60987), UINT16_C( 52216) } }, + { { UINT16_C( 17497), UINT16_C( 1121), UINT16_C( 1811), UINT16_C( 10768), + UINT16_C( 5152), UINT16_C( 5731), UINT16_C( 3815), UINT16_C( 1174) }, + { UINT16_C( 154), UINT16_C( 5), UINT16_C( 122), UINT16_C( 97), + UINT16_C( 55), UINT16_C( 136), UINT16_C( 169), UINT16_C( 179) }, + { UINT16_C( 48), UINT16_C( 123), UINT16_C( 12), UINT16_C( 174)}, + INT8_C( 2), + { UINT16_C( 15649), UINT16_C( 1061), UINT16_C( 347), UINT16_C( 9604), + UINT16_C( 4492), UINT16_C( 4099), UINT16_C( 1787), UINT16_C( 64562) } }, + { { UINT16_C( 16583), UINT16_C( 5252), UINT16_C( 7490), UINT16_C( 3819), + UINT16_C( 6483), UINT16_C( 9245), UINT16_C( 455), UINT16_C( 5637) }, + { UINT16_C( 96), UINT16_C( 184), UINT16_C( 42), UINT16_C( 35), + UINT16_C( 59), UINT16_C( 103), UINT16_C( 163), UINT16_C( 139) }, + { UINT16_C( 142), UINT16_C( 105), UINT16_C( 37), UINT16_C( 39)}, + INT8_C( 2), + { UINT16_C( 13031), UINT16_C( 63980), UINT16_C( 5936), UINT16_C( 2524), + UINT16_C( 4300), UINT16_C( 5434), UINT16_C( 59960), UINT16_C( 494) } }, + { { UINT16_C( 6473), UINT16_C( 18112), UINT16_C( 4323), UINT16_C( 18260), + UINT16_C( 3172), UINT16_C( 7449), UINT16_C( 4168), UINT16_C( 15557) }, + { UINT16_C( 130), UINT16_C( 80), UINT16_C( 190), UINT16_C( 5), + UINT16_C( 35), UINT16_C( 197), UINT16_C( 134), UINT16_C( 124) }, + { UINT16_C( 75), UINT16_C( 56), UINT16_C( 113), UINT16_C( 90)}, + INT8_C( 1), + { UINT16_C( 64729), UINT16_C( 13632), UINT16_C( 59219), UINT16_C( 17980), + UINT16_C( 1212), UINT16_C( 61953), UINT16_C( 62200), UINT16_C( 8613) } }, + { { UINT16_C( 6128), UINT16_C( 12118), UINT16_C( 1671), UINT16_C( 3331), + UINT16_C( 10416), UINT16_C( 13204), UINT16_C( 12779), UINT16_C( 9238) }, + { UINT16_C( 176), UINT16_C( 149), UINT16_C( 121), UINT16_C( 79), + UINT16_C( 122), UINT16_C( 96), UINT16_C( 136), UINT16_C( 81) }, + { UINT16_C( 144), UINT16_C( 74), UINT16_C( 30), UINT16_C( 104)}, + INT8_C( 0), + { UINT16_C( 46320), UINT16_C( 56198), UINT16_C( 49783), UINT16_C( 57491), + UINT16_C( 58384), UINT16_C( 64916), UINT16_C( 58731), UINT16_C( 63110) } }, + { { UINT16_C( 92), UINT16_C( 7629), UINT16_C( 19152), UINT16_C( 332), + UINT16_C( 1926), UINT16_C( 8952), UINT16_C( 3219), UINT16_C( 16176) }, + { UINT16_C( 105), UINT16_C( 80), UINT16_C( 192), UINT16_C( 169), + UINT16_C( 149), UINT16_C( 96), UINT16_C( 195), UINT16_C( 135) }, + { UINT16_C( 35), UINT16_C( 132), UINT16_C( 55), UINT16_C( 80)}, + INT8_C( 0), + { UINT16_C( 61953), UINT16_C( 4829), UINT16_C( 12432), UINT16_C( 59953), + UINT16_C( 62247), UINT16_C( 5592), UINT16_C( 61930), UINT16_C( 11451) } }, + }; + + simde_uint16x8_t r, a, b; + simde_uint16x4_t v; + + for (int i = 0 ; i < 8 ; i++) { + a = simde_vld1q_u16(test_vec[i].a); + b = simde_vld1q_u16(test_vec[i].b); + v = simde_vld1_u16(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmlsq_lane_u16(a, b, v, 0); break; + case 1: r = simde_vmlsq_lane_u16(a, b, v, 1); break; + case 2: r = simde_vmlsq_lane_u16(a, b, v, 2); break; + case 3: r = simde_vmlsq_lane_u16(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_u16(0); break; + } + simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlsq_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[4]; + uint32_t b[4]; + uint32_t v[2]; + int8_t lane; + uint32_t r[4]; + } test_vec[] = { + { { UINT32_C( 15791685), UINT32_C( 5492798), UINT32_C( 10259354), UINT32_C( 16700343) }, + { UINT32_C( 6663), UINT32_C( 2022), UINT32_C( 3324), UINT32_C( 15952) }, + { UINT32_C( 4043), UINT32_C( 15374)}, + INT8_C( 0), + { UINT32_C(4283820472), UINT32_C(4292285148), UINT32_C(4291787718), UINT32_C(4247173703) } }, + { { UINT32_C( 15645803), UINT32_C( 6441411), UINT32_C( 15321628), UINT32_C( 16904164) }, + { UINT32_C( 18312), UINT32_C( 14372), UINT32_C( 15285), UINT32_C( 2467) }, + { UINT32_C( 17867), UINT32_C( 14975)}, + INT8_C( 1), + { UINT32_C(4036390899), UINT32_C(4086188007), UINT32_C(4081396049), UINT32_C(4274928135) } }, + { { UINT32_C( 6161128), UINT32_C( 11966727), UINT32_C( 2995670), UINT32_C( 9425172) }, + { UINT32_C( 11544), UINT32_C( 5384), UINT32_C( 12464), UINT32_C( 19073) }, + { UINT32_C( 12943), UINT32_C( 7149)}, + INT8_C( 1), + { UINT32_C(4218600368), UINT32_C(4268443807), UINT32_C(4208857830), UINT32_C(4168039591) } }, + { { UINT32_C( 9938231), UINT32_C( 14165811), UINT32_C( 9936283), UINT32_C( 9236126) }, + { UINT32_C( 5950), UINT32_C( 7535), UINT32_C( 2047), UINT32_C( 9964) }, + { UINT32_C( 18779), UINT32_C( 12047)}, + INT8_C( 1), + { UINT32_C(4233225877), UINT32_C(4218358962), UINT32_C(4280243370), UINT32_C(4184167114) } }, + { { UINT32_C( 18599053), UINT32_C( 19007241), UINT32_C( 18733173), UINT32_C( 17239616) }, + { UINT32_C( 4951), UINT32_C( 10742), UINT32_C( 16153), UINT32_C( 7316) }, + { UINT32_C( 2694), UINT32_C( 19335)}, + INT8_C( 0), + { UINT32_C( 5261059), UINT32_C(4285035589), UINT32_C(4270184287), UINT32_C(4292497608) } }, + { { UINT32_C( 10369957), UINT32_C( 11318872), UINT32_C( 5403206), UINT32_C( 14029352) }, + { UINT32_C( 12645), UINT32_C( 7719), UINT32_C( 12943), UINT32_C( 11295) }, + { UINT32_C( 17288), UINT32_C( 16731)}, + INT8_C( 0), + { UINT32_C(4086730493), UINT32_C(4172840096), UINT32_C(4076611918), UINT32_C(4113728688) } }, + { { UINT32_C( 7181617), UINT32_C( 19018654), UINT32_C( 8564072), UINT32_C( 12618372) }, + { UINT32_C( 1475), UINT32_C( 5581), UINT32_C( 16355), UINT32_C( 5170) }, + { UINT32_C( 11129), UINT32_C( 4357)}, + INT8_C( 0), + { UINT32_C(4285733638), UINT32_C(4251875001), UINT32_C(4121516573), UINT32_C(4250048738) } }, + { { UINT32_C( 17863140), UINT32_C( 4138048), UINT32_C( 5890744), UINT32_C( 3960839) }, + { UINT32_C( 7857), UINT32_C( 17458), UINT32_C( 15399), UINT32_C( 19704) }, + { UINT32_C( 13678), UINT32_C( 11220)}, + INT8_C( 1), + { UINT32_C(4224674896), UINT32_C(4103226584), UINT32_C(4128081260), UINT32_C(4077849255) } }, + }; + + simde_uint32x4_t r, a, b; + simde_uint32x2_t v; + + for (int i = 0 ; i < 8 ; i++) { + a = simde_vld1q_u32(test_vec[i].a); + b = simde_vld1q_u32(test_vec[i].b); + v = simde_vld1_u32(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmlsq_lane_u32(a, b, v, 0); break; + case 1: r = simde_vmlsq_lane_u32(a, b, v, 1); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_u32(0); break; + } + simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlsq_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + float a[4]; + float b[4]; + float v[4]; + int8_t lane; + float r[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( -6762.79), SIMDE_FLOAT32_C( 499.13), SIMDE_FLOAT32_C( -6587.10), SIMDE_FLOAT32_C( 2471.72) }, + { SIMDE_FLOAT32_C( 28.52), SIMDE_FLOAT32_C( 44.06), SIMDE_FLOAT32_C( 72.56), SIMDE_FLOAT32_C( 95.83) }, + { SIMDE_FLOAT32_C( -78.53), SIMDE_FLOAT32_C( 73.13), SIMDE_FLOAT32_C( -59.69), SIMDE_FLOAT32_C( 1.53)}, + INT8_C( 2), + { SIMDE_FLOAT32_C( -5060.43), SIMDE_FLOAT32_C( 3129.07), SIMDE_FLOAT32_C( -2255.99), SIMDE_FLOAT32_C( 8191.81) } }, + { { SIMDE_FLOAT32_C( 6887.56), SIMDE_FLOAT32_C( 1557.40), SIMDE_FLOAT32_C( 798.28), SIMDE_FLOAT32_C( 411.12) }, + { SIMDE_FLOAT32_C( -21.25), SIMDE_FLOAT32_C( -33.97), SIMDE_FLOAT32_C( 43.09), SIMDE_FLOAT32_C( 31.89) }, + { SIMDE_FLOAT32_C( -84.05), SIMDE_FLOAT32_C( 40.51), SIMDE_FLOAT32_C( 54.72), SIMDE_FLOAT32_C( 53.52)}, + INT8_C( 1), + { SIMDE_FLOAT32_C( 7748.40), SIMDE_FLOAT32_C( 2933.52), SIMDE_FLOAT32_C( -947.30), SIMDE_FLOAT32_C( -880.74) } }, + { { SIMDE_FLOAT32_C( 2373.31), SIMDE_FLOAT32_C( 1066.86), SIMDE_FLOAT32_C( -1224.48), SIMDE_FLOAT32_C( 9197.43) }, + { SIMDE_FLOAT32_C( -78.15), SIMDE_FLOAT32_C( 31.82), SIMDE_FLOAT32_C( -19.08), SIMDE_FLOAT32_C( 35.15) }, + { SIMDE_FLOAT32_C( 46.27), SIMDE_FLOAT32_C( -65.33), SIMDE_FLOAT32_C( 4.83), SIMDE_FLOAT32_C( -2.95)}, + INT8_C( 2), + { SIMDE_FLOAT32_C( 2750.77), SIMDE_FLOAT32_C( 913.17), SIMDE_FLOAT32_C( -1132.32), SIMDE_FLOAT32_C( 9027.66) } }, + { { SIMDE_FLOAT32_C( -1918.81), SIMDE_FLOAT32_C( 5219.58), SIMDE_FLOAT32_C( 2597.27), SIMDE_FLOAT32_C( 8829.63) }, + { SIMDE_FLOAT32_C( 65.58), SIMDE_FLOAT32_C( -98.35), SIMDE_FLOAT32_C( 63.35), SIMDE_FLOAT32_C( -73.00) }, + { SIMDE_FLOAT32_C( -60.40), SIMDE_FLOAT32_C( 78.91), SIMDE_FLOAT32_C( -70.88), SIMDE_FLOAT32_C( -96.15)}, + INT8_C( 3), + { SIMDE_FLOAT32_C( 4386.71), SIMDE_FLOAT32_C( -4236.77), SIMDE_FLOAT32_C( 8688.37), SIMDE_FLOAT32_C( 1810.68) } }, + { { SIMDE_FLOAT32_C( 3167.82), SIMDE_FLOAT32_C( 7995.66), SIMDE_FLOAT32_C( 728.04), SIMDE_FLOAT32_C( -3484.87) }, + { SIMDE_FLOAT32_C( -92.09), SIMDE_FLOAT32_C( 7.83), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( -33.98) }, + { SIMDE_FLOAT32_C( -73.28), SIMDE_FLOAT32_C( 91.58), SIMDE_FLOAT32_C( -70.19), SIMDE_FLOAT32_C( 67.29)}, + INT8_C( 1), + { SIMDE_FLOAT32_C( 11601.42), SIMDE_FLOAT32_C( 7278.59), SIMDE_FLOAT32_C( 757.35), SIMDE_FLOAT32_C( -372.98) } }, + { { SIMDE_FLOAT32_C( 4508.62), SIMDE_FLOAT32_C( -9206.33), SIMDE_FLOAT32_C( 8790.14), SIMDE_FLOAT32_C( -3027.85) }, + { SIMDE_FLOAT32_C( 60.34), SIMDE_FLOAT32_C( -64.83), SIMDE_FLOAT32_C( -9.37), SIMDE_FLOAT32_C( 61.16) }, + { SIMDE_FLOAT32_C( -66.51), SIMDE_FLOAT32_C( 81.52), SIMDE_FLOAT32_C( -64.63), SIMDE_FLOAT32_C( -25.06)}, + INT8_C( 1), + { SIMDE_FLOAT32_C( -410.30), SIMDE_FLOAT32_C( -3921.39), SIMDE_FLOAT32_C( 9553.98), SIMDE_FLOAT32_C( -8013.61) } }, + { { SIMDE_FLOAT32_C( -4120.81), SIMDE_FLOAT32_C( -7964.12), SIMDE_FLOAT32_C( -1354.29), SIMDE_FLOAT32_C( -8729.84) }, + { SIMDE_FLOAT32_C( -55.62), SIMDE_FLOAT32_C( -2.28), SIMDE_FLOAT32_C( 46.43), SIMDE_FLOAT32_C( -31.88) }, + { SIMDE_FLOAT32_C( 70.07), SIMDE_FLOAT32_C( 5.72), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( -61.90)}, + INT8_C( 3), + { SIMDE_FLOAT32_C( -7563.69), SIMDE_FLOAT32_C( -8105.25), SIMDE_FLOAT32_C( 1519.73), SIMDE_FLOAT32_C(-10703.21) } }, + { { SIMDE_FLOAT32_C( -5959.85), SIMDE_FLOAT32_C( -9679.86), SIMDE_FLOAT32_C( -7706.12), SIMDE_FLOAT32_C( 3826.01) }, + { SIMDE_FLOAT32_C( -17.91), SIMDE_FLOAT32_C( -91.17), SIMDE_FLOAT32_C( -19.92), SIMDE_FLOAT32_C( 53.18) }, + { SIMDE_FLOAT32_C( -90.02), SIMDE_FLOAT32_C( -37.21), SIMDE_FLOAT32_C( 45.50), SIMDE_FLOAT32_C( -78.18)}, + INT8_C( 2), + { SIMDE_FLOAT32_C( -5144.95), SIMDE_FLOAT32_C( -5531.63), SIMDE_FLOAT32_C( -6799.76), SIMDE_FLOAT32_C( 1406.32) } }, + }; + + simde_float32x4_t r, a, b, v; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + a = simde_vld1q_f32(test_vec[i].a); + b = simde_vld1q_f32(test_vec[i].b); + v = simde_vld1q_f32(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmlsq_laneq_f32(a, b, v, 0); break; + case 1: r = simde_vmlsq_laneq_f32(a, b, v, 1); break; + case 2: r = simde_vmlsq_laneq_f32(a, b, v, 2); break; + case 3: r = simde_vmlsq_laneq_f32(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_f32(0); break; + } + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + } + + return 0; +} + +static int +test_simde_vmlsq_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[8]; + int16_t b[8]; + int16_t v[8]; + int8_t lane; + int16_t r[8]; + } test_vec[] = { + { { -INT16_C( 5645), INT16_C( 6126), INT16_C( 1606), INT16_C( 419), + -INT16_C( 6707), INT16_C( 7665), INT16_C( 938), -INT16_C( 1109) }, + { -INT16_C( 3), INT16_C( 84), INT16_C( 61), INT16_C( 65), + INT16_C( 13), -INT16_C( 33), -INT16_C( 3), -INT16_C( 52) }, + { -INT16_C( 38), INT16_C( 20), INT16_C( 21), INT16_C( 70), + -INT16_C( 84), -INT16_C( 76), INT16_C( 22), INT16_C( 21)}, + INT8_C( 1), + { -INT16_C( 5585), INT16_C( 4446), INT16_C( 386), -INT16_C( 881), + -INT16_C( 6967), INT16_C( 8325), INT16_C( 998), -INT16_C( 69) } }, + { { INT16_C( 5485), -INT16_C( 1448), INT16_C( 4322), INT16_C( 2394), + -INT16_C( 8117), INT16_C( 7330), -INT16_C( 550), -INT16_C( 6344) }, + { INT16_C( 28), -INT16_C( 4), INT16_C( 50), INT16_C( 90), + INT16_C( 82), -INT16_C( 61), INT16_C( 7), INT16_C( 35) }, + { -INT16_C( 77), -INT16_C( 91), INT16_C( 58), -INT16_C( 97), + INT16_C( 7), INT16_C( 38), -INT16_C( 21), -INT16_C( 86)}, + INT8_C( 5), + { INT16_C( 4421), -INT16_C( 1296), INT16_C( 2422), -INT16_C( 1026), + -INT16_C( 11233), INT16_C( 9648), -INT16_C( 816), -INT16_C( 7674) } }, + { { INT16_C( 7671), INT16_C( 3232), -INT16_C( 6536), -INT16_C( 4163), + -INT16_C( 2780), INT16_C( 2784), INT16_C( 2806), INT16_C( 1950) }, + { -INT16_C( 88), -INT16_C( 97), INT16_C( 69), -INT16_C( 76), + -INT16_C( 43), INT16_C( 47), INT16_C( 93), -INT16_C( 43) }, + { -INT16_C( 50), INT16_C( 30), INT16_C( 43), INT16_C( 22), + INT16_C( 15), -INT16_C( 84), -INT16_C( 38), -INT16_C( 55)}, + INT8_C( 1), + { INT16_C( 10311), INT16_C( 6142), -INT16_C( 8606), -INT16_C( 1883), + -INT16_C( 1490), INT16_C( 1374), INT16_C( 16), INT16_C( 3240) } }, + { { INT16_C( 2979), -INT16_C( 2417), -INT16_C( 7237), INT16_C( 5546), + INT16_C( 3240), -INT16_C( 9983), INT16_C( 2515), -INT16_C( 7398) }, + { INT16_C( 4), -INT16_C( 71), -INT16_C( 87), -INT16_C( 85), + INT16_C( 34), INT16_C( 33), -INT16_C( 99), INT16_C( 89) }, + { INT16_C( 80), -INT16_C( 55), INT16_C( 13), INT16_C( 57), + -INT16_C( 87), -INT16_C( 49), -INT16_C( 42), INT16_C( 1)}, + INT8_C( 4), + { INT16_C( 3327), -INT16_C( 8594), -INT16_C( 14806), -INT16_C( 1849), + INT16_C( 6198), -INT16_C( 7112), -INT16_C( 6098), INT16_C( 345) } }, + { { -INT16_C( 6353), INT16_C( 885), INT16_C( 8828), -INT16_C( 7651), + -INT16_C( 9996), INT16_C( 55), -INT16_C( 6512), -INT16_C( 8877) }, + { -INT16_C( 54), INT16_C( 12), -INT16_C( 76), -INT16_C( 30), + INT16_C( 96), INT16_C( 1), -INT16_C( 55), -INT16_C( 39) }, + { INT16_C( 52), -INT16_C( 2), -INT16_C( 84), -INT16_C( 89), + -INT16_C( 37), -INT16_C( 75), -INT16_C( 69), -INT16_C( 48)}, + INT8_C( 4), + { -INT16_C( 8351), INT16_C( 1329), INT16_C( 6016), -INT16_C( 8761), + -INT16_C( 6444), INT16_C( 92), -INT16_C( 8547), -INT16_C( 10320) } }, + { { INT16_C( 4466), -INT16_C( 8522), -INT16_C( 5854), -INT16_C( 8678), + INT16_C( 887), -INT16_C( 988), INT16_C( 7415), -INT16_C( 8781) }, + { -INT16_C( 62), -INT16_C( 93), INT16_C( 29), INT16_C( 6), + INT16_C( 13), -INT16_C( 42), INT16_C( 62), -INT16_C( 55) }, + { -INT16_C( 45), -INT16_C( 34), -INT16_C( 92), -INT16_C( 32), + -INT16_C( 6), INT16_C( 20), INT16_C( 34), -INT16_C( 60)}, + INT8_C( 4), + { INT16_C( 4094), -INT16_C( 9080), -INT16_C( 5680), -INT16_C( 8642), + INT16_C( 965), -INT16_C( 1240), INT16_C( 7787), -INT16_C( 9111) } }, + { { -INT16_C( 7753), -INT16_C( 8668), -INT16_C( 3870), INT16_C( 2230), + INT16_C( 3263), -INT16_C( 5559), INT16_C( 2354), INT16_C( 3199) }, + { -INT16_C( 23), -INT16_C( 88), INT16_C( 93), -INT16_C( 81), + -INT16_C( 53), INT16_C( 43), INT16_C( 23), INT16_C( 59) }, + { -INT16_C( 99), INT16_C( 44), -INT16_C( 81), -INT16_C( 50), + -INT16_C( 52), INT16_C( 83), -INT16_C( 4), INT16_C( 29)}, + INT8_C( 6), + { -INT16_C( 7845), -INT16_C( 9020), -INT16_C( 3498), INT16_C( 1906), + INT16_C( 3051), -INT16_C( 5387), INT16_C( 2446), INT16_C( 3435) } }, + { { -INT16_C( 9513), -INT16_C( 4391), INT16_C( 6919), INT16_C( 1757), + INT16_C( 9022), -INT16_C( 9450), INT16_C( 4930), -INT16_C( 9686) }, + { INT16_C( 51), INT16_C( 13), -INT16_C( 8), INT16_C( 87), + -INT16_C( 72), INT16_C( 14), -INT16_C( 5), -INT16_C( 29) }, + { -INT16_C( 92), INT16_C( 88), INT16_C( 20), -INT16_C( 56), + -INT16_C( 77), INT16_C( 17), INT16_C( 73), -INT16_C( 30)}, + INT8_C( 0), + { -INT16_C( 4821), -INT16_C( 3195), INT16_C( 6183), INT16_C( 9761), + INT16_C( 2398), -INT16_C( 8162), INT16_C( 4470), -INT16_C( 12354) } }, + }; + + simde_int16x8_t r, a, b, v; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + a = simde_vld1q_s16(test_vec[i].a); + b = simde_vld1q_s16(test_vec[i].b); + v = simde_vld1q_s16(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmlsq_laneq_s16(a, b, v, 0); break; + case 1: r = simde_vmlsq_laneq_s16(a, b, v, 1); break; + case 2: r = simde_vmlsq_laneq_s16(a, b, v, 2); break; + case 3: r = simde_vmlsq_laneq_s16(a, b, v, 3); break; + case 4: r = simde_vmlsq_laneq_s16(a, b, v, 4); break; + case 5: r = simde_vmlsq_laneq_s16(a, b, v, 5); break; + case 6: r = simde_vmlsq_laneq_s16(a, b, v, 6); break; + case 7: r = simde_vmlsq_laneq_s16(a, b, v, 7); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_s16(0); break; + } + simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlsq_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int32_t b[4]; + int32_t v[4]; + int8_t lane; + int32_t r[4]; + } test_vec[] = { + { { -INT32_C( 7266304), INT32_C( 8765414), -INT32_C( 7324637), -INT32_C( 3796132) }, + { INT32_C( 2916), -INT32_C( 9685), -INT32_C( 3851), INT32_C( 5537) }, + { INT32_C( 6312), -INT32_C( 2907), -INT32_C( 5037), INT32_C( 438)}, + INT8_C( 1), + { INT32_C( 1210508), -INT32_C( 19388881), -INT32_C( 18519494), INT32_C( 12299927) } }, + { { -INT32_C( 2085611), INT32_C( 8518775), -INT32_C( 2401017), -INT32_C( 7493078) }, + { -INT32_C( 850), -INT32_C( 102), INT32_C( 2467), INT32_C( 6004) }, + { -INT32_C( 2910), INT32_C( 4501), INT32_C( 8068), INT32_C( 2494)}, + INT8_C( 2), + { INT32_C( 4772189), INT32_C( 9341711), -INT32_C( 22304773), -INT32_C( 55933350) } }, + { { INT32_C( 9795531), -INT32_C( 1489691), INT32_C( 6104423), -INT32_C( 6800355) }, + { INT32_C( 4492), INT32_C( 7544), -INT32_C( 3632), INT32_C( 7019) }, + { INT32_C( 4948), INT32_C( 2073), INT32_C( 258), -INT32_C( 9573)}, + INT8_C( 2), + { INT32_C( 8636595), -INT32_C( 3436043), INT32_C( 7041479), -INT32_C( 8611257) } }, + { { -INT32_C( 3176719), -INT32_C( 2766058), -INT32_C( 1515092), INT32_C( 9841774) }, + { -INT32_C( 5232), INT32_C( 9968), INT32_C( 8457), INT32_C( 2722) }, + { -INT32_C( 1468), -INT32_C( 90), -INT32_C( 6146), INT32_C( 7313)}, + INT8_C( 2), + { -INT32_C( 35332591), INT32_C( 58497270), INT32_C( 50461630), INT32_C( 26571186) } }, + { { -INT32_C( 1959870), -INT32_C( 1993), INT32_C( 7064633), INT32_C( 5068070) }, + { INT32_C( 6246), -INT32_C( 2027), INT32_C( 1778), -INT32_C( 9217) }, + { -INT32_C( 4519), INT32_C( 3833), -INT32_C( 3138), INT32_C( 1701)}, + INT8_C( 2), + { INT32_C( 17640078), -INT32_C( 6362719), INT32_C( 12643997), -INT32_C( 23854876) } }, + { { INT32_C( 8343862), INT32_C( 6370160), INT32_C( 5836424), INT32_C( 2038678) }, + { INT32_C( 1733), -INT32_C( 8869), INT32_C( 9296), -INT32_C( 9973) }, + { -INT32_C( 9543), -INT32_C( 7415), INT32_C( 5411), -INT32_C( 481)}, + INT8_C( 1), + { INT32_C( 21194057), -INT32_C( 59393475), INT32_C( 74766264), -INT32_C( 71911117) } }, + { { INT32_C( 2535276), -INT32_C( 3126396), -INT32_C( 5814968), INT32_C( 1121683) }, + { INT32_C( 7388), INT32_C( 2655), INT32_C( 3990), INT32_C( 3338) }, + { INT32_C( 6044), INT32_C( 8407), INT32_C( 1220), -INT32_C( 723)}, + INT8_C( 3), + { INT32_C( 7876800), -INT32_C( 1206831), -INT32_C( 2930198), INT32_C( 3535057) } }, + { { INT32_C( 9047969), -INT32_C( 3280918), INT32_C( 5675927), INT32_C( 3443148) }, + { INT32_C( 9648), -INT32_C( 860), INT32_C( 4176), -INT32_C( 3865) }, + { INT32_C( 4658), INT32_C( 8440), INT32_C( 848), INT32_C( 8075)}, + INT8_C( 1), + { -INT32_C( 72381151), INT32_C( 3977482), -INT32_C( 29569513), INT32_C( 36063748) } }, + }; + + simde_int32x4_t r, a, b, v; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + a = simde_vld1q_s32(test_vec[i].a); + b = simde_vld1q_s32(test_vec[i].b); + v = simde_vld1q_s32(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmlsq_laneq_s32(a, b, v, 0); break; + case 1: r = simde_vmlsq_laneq_s32(a, b, v, 1); break; + case 2: r = simde_vmlsq_laneq_s32(a, b, v, 2); break; + case 3: r = simde_vmlsq_laneq_s32(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_s32(0); break; + } + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlsq_laneq_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[8]; + uint16_t b[8]; + uint16_t v[8]; + int8_t lane; + uint16_t r[8]; + } test_vec[] = { + { { UINT16_C( 18145), UINT16_C( 14017), UINT16_C( 15721), UINT16_C( 7915), + UINT16_C( 12416), UINT16_C( 9449), UINT16_C( 2938), UINT16_C( 7196) }, + { UINT16_C( 49), UINT16_C( 187), UINT16_C( 5), UINT16_C( 77), + UINT16_C( 55), UINT16_C( 70), UINT16_C( 135), UINT16_C( 137) }, + { UINT16_C( 191), UINT16_C( 124), UINT16_C( 162), UINT16_C( 6), + UINT16_C( 62), UINT16_C( 99), UINT16_C( 119), UINT16_C( 172)}, + INT8_C( 6), + { UINT16_C( 12314), UINT16_C( 57300), UINT16_C( 15126), UINT16_C( 64288), + UINT16_C( 5871), UINT16_C( 1119), UINT16_C( 52409), UINT16_C( 56429) } }, + { { UINT16_C( 10417), UINT16_C( 17841), UINT16_C( 16776), UINT16_C( 1442), + UINT16_C( 8682), UINT16_C( 9449), UINT16_C( 10075), UINT16_C( 10824) }, + { UINT16_C( 3), UINT16_C( 115), UINT16_C( 118), UINT16_C( 193), + UINT16_C( 68), UINT16_C( 55), UINT16_C( 63), UINT16_C( 49) }, + { UINT16_C( 41), UINT16_C( 174), UINT16_C( 106), UINT16_C( 117), + UINT16_C( 72), UINT16_C( 25), UINT16_C( 93), UINT16_C( 27)}, + INT8_C( 0), + { UINT16_C( 10294), UINT16_C( 13126), UINT16_C( 11938), UINT16_C( 59065), + UINT16_C( 5894), UINT16_C( 7194), UINT16_C( 7492), UINT16_C( 8815) } }, + { { UINT16_C( 3889), UINT16_C( 13409), UINT16_C( 19336), UINT16_C( 15123), + UINT16_C( 118), UINT16_C( 6711), UINT16_C( 17579), UINT16_C( 5828) }, + { UINT16_C( 104), UINT16_C( 18), UINT16_C( 116), UINT16_C( 147), + UINT16_C( 173), UINT16_C( 62), UINT16_C( 43), UINT16_C( 77) }, + { UINT16_C( 77), UINT16_C( 124), UINT16_C( 174), UINT16_C( 73), + UINT16_C( 65), UINT16_C( 99), UINT16_C( 197), UINT16_C( 17)}, + INT8_C( 3), + { UINT16_C( 61833), UINT16_C( 12095), UINT16_C( 10868), UINT16_C( 4392), + UINT16_C( 53025), UINT16_C( 2185), UINT16_C( 14440), UINT16_C( 207) } }, + { { UINT16_C( 10275), UINT16_C( 705), UINT16_C( 9741), UINT16_C( 869), + UINT16_C( 10520), UINT16_C( 8115), UINT16_C( 17123), UINT16_C( 1031) }, + { UINT16_C( 20), UINT16_C( 196), UINT16_C( 108), UINT16_C( 24), + UINT16_C( 122), UINT16_C( 167), UINT16_C( 156), UINT16_C( 187) }, + { UINT16_C( 92), UINT16_C( 74), UINT16_C( 51), UINT16_C( 164), + UINT16_C( 173), UINT16_C( 154), UINT16_C( 51), UINT16_C( 187)}, + INT8_C( 7), + { UINT16_C( 6535), UINT16_C( 29589), UINT16_C( 55081), UINT16_C( 61917), + UINT16_C( 53242), UINT16_C( 42422), UINT16_C( 53487), UINT16_C( 31598) } }, + { { UINT16_C( 16022), UINT16_C( 17903), UINT16_C( 206), UINT16_C( 8835), + UINT16_C( 8437), UINT16_C( 185), UINT16_C( 10376), UINT16_C( 2785) }, + { UINT16_C( 97), UINT16_C( 103), UINT16_C( 44), UINT16_C( 130), + UINT16_C( 115), UINT16_C( 125), UINT16_C( 119), UINT16_C( 92) }, + { UINT16_C( 4), UINT16_C( 92), UINT16_C( 15), UINT16_C( 24), + UINT16_C( 0), UINT16_C( 174), UINT16_C( 179), UINT16_C( 77)}, + INT8_C( 1), + { UINT16_C( 7098), UINT16_C( 8427), UINT16_C( 61694), UINT16_C( 62411), + UINT16_C( 63393), UINT16_C( 54221), UINT16_C( 64964), UINT16_C( 59857) } }, + { { UINT16_C( 16280), UINT16_C( 2025), UINT16_C( 3456), UINT16_C( 6751), + UINT16_C( 7293), UINT16_C( 15784), UINT16_C( 14562), UINT16_C( 568) }, + { UINT16_C( 35), UINT16_C( 55), UINT16_C( 168), UINT16_C( 79), + UINT16_C( 106), UINT16_C( 19), UINT16_C( 128), UINT16_C( 125) }, + { UINT16_C( 102), UINT16_C( 190), UINT16_C( 11), UINT16_C( 126), + UINT16_C( 59), UINT16_C( 79), UINT16_C( 189), UINT16_C( 80)}, + INT8_C( 7), + { UINT16_C( 13480), UINT16_C( 63161), UINT16_C( 55552), UINT16_C( 431), + UINT16_C( 64349), UINT16_C( 14264), UINT16_C( 4322), UINT16_C( 56104) } }, + { { UINT16_C( 7069), UINT16_C( 12929), UINT16_C( 18496), UINT16_C( 3618), + UINT16_C( 9857), UINT16_C( 8094), UINT16_C( 19162), UINT16_C( 7233) }, + { UINT16_C( 83), UINT16_C( 58), UINT16_C( 68), UINT16_C( 92), + UINT16_C( 180), UINT16_C( 11), UINT16_C( 52), UINT16_C( 148) }, + { UINT16_C( 70), UINT16_C( 164), UINT16_C( 85), UINT16_C( 95), + UINT16_C( 150), UINT16_C( 86), UINT16_C( 172), UINT16_C( 12)}, + INT8_C( 4), + { UINT16_C( 60155), UINT16_C( 4229), UINT16_C( 8296), UINT16_C( 55354), + UINT16_C( 48393), UINT16_C( 6444), UINT16_C( 11362), UINT16_C( 50569) } }, + { { UINT16_C( 4142), UINT16_C( 11558), UINT16_C( 651), UINT16_C( 17526), + UINT16_C( 9861), UINT16_C( 2214), UINT16_C( 11903), UINT16_C( 15590) }, + { UINT16_C( 154), UINT16_C( 179), UINT16_C( 62), UINT16_C( 90), + UINT16_C( 75), UINT16_C( 76), UINT16_C( 161), UINT16_C( 113) }, + { UINT16_C( 176), UINT16_C( 164), UINT16_C( 21), UINT16_C( 52), + UINT16_C( 15), UINT16_C( 165), UINT16_C( 85), UINT16_C( 50)}, + INT8_C( 7), + { UINT16_C( 61978), UINT16_C( 2608), UINT16_C( 63087), UINT16_C( 13026), + UINT16_C( 6111), UINT16_C( 63950), UINT16_C( 3853), UINT16_C( 9940) } }, + }; + + simde_uint16x8_t r, a, b, v; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + a = simde_vld1q_u16(test_vec[i].a); + b = simde_vld1q_u16(test_vec[i].b); + v = simde_vld1q_u16(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmlsq_laneq_u16(a, b, v, 0); break; + case 1: r = simde_vmlsq_laneq_u16(a, b, v, 1); break; + case 2: r = simde_vmlsq_laneq_u16(a, b, v, 2); break; + case 3: r = simde_vmlsq_laneq_u16(a, b, v, 3); break; + case 4: r = simde_vmlsq_laneq_u16(a, b, v, 4); break; + case 5: r = simde_vmlsq_laneq_u16(a, b, v, 5); break; + case 6: r = simde_vmlsq_laneq_u16(a, b, v, 6); break; + case 7: r = simde_vmlsq_laneq_u16(a, b, v, 7); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_u16(0); break; + } + simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlsq_laneq_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[4]; + uint32_t b[4]; + uint32_t v[4]; + int8_t lane; + uint32_t r[4]; + } test_vec[] = { + { { UINT32_C( 1861053), UINT32_C( 12637301), UINT32_C( 15274610), UINT32_C( 9835651) }, + { UINT32_C( 11495), UINT32_C( 8707), UINT32_C( 5467), UINT32_C( 6266) }, + { UINT32_C( 4717), UINT32_C( 14182), UINT32_C( 15593), UINT32_C( 6816)}, + INT8_C( 2), + { UINT32_C(4117586814), UINT32_C(4171836346), UINT32_C(4224994975), UINT32_C(4207097209) } }, + { { UINT32_C( 9489067), UINT32_C( 8154869), UINT32_C( 3448644), UINT32_C( 17329986) }, + { UINT32_C( 11917), UINT32_C( 9564), UINT32_C( 8828), UINT32_C( 14785) }, + { UINT32_C( 15402), UINT32_C( 3072), UINT32_C( 76), UINT32_C( 1440)}, + INT8_C( 0), + { UINT32_C(4120910729), UINT32_C(4155817437), UINT32_C(4162447084), UINT32_C(4084578712) } }, + { { UINT32_C( 19581104), UINT32_C( 17145354), UINT32_C( 4944592), UINT32_C( 2397841) }, + { UINT32_C( 16374), UINT32_C( 18528), UINT32_C( 3954), UINT32_C( 16844) }, + { UINT32_C( 6018), UINT32_C( 6617), UINT32_C( 11914), UINT32_C( 15796)}, + INT8_C( 0), + { UINT32_C(4216009668), UINT32_C(4200611146), UINT32_C(4276116716), UINT32_C(4195997945) } }, + { { UINT32_C( 13966437), UINT32_C( 8420533), UINT32_C( 3122956), UINT32_C( 18154704) }, + { UINT32_C( 17150), UINT32_C( 7661), UINT32_C( 7460), UINT32_C( 577) }, + { UINT32_C( 13649), UINT32_C( 9057), UINT32_C( 3959), UINT32_C( 12619)}, + INT8_C( 1), + { UINT32_C(4153606183), UINT32_C(4234002152), UINT32_C(4230525032), UINT32_C( 12928815) } }, + { { UINT32_C( 3665546), UINT32_C( 1202649), UINT32_C( 18582276), UINT32_C( 16796651) }, + { UINT32_C( 14139), UINT32_C( 7351), UINT32_C( 1028), UINT32_C( 13472) }, + { UINT32_C( 16984), UINT32_C( 8465), UINT32_C( 10404), UINT32_C( 9769)}, + INT8_C( 0), + { UINT32_C(4058496066), UINT32_C(4171320561), UINT32_C( 1122724), UINT32_C(4082955499) } }, + { { UINT32_C( 8215597), UINT32_C( 17956728), UINT32_C( 5061094), UINT32_C( 14392408) }, + { UINT32_C( 2670), UINT32_C( 7127), UINT32_C( 5942), UINT32_C( 18433) }, + { UINT32_C( 5407), UINT32_C( 18714), UINT32_C( 11120), UINT32_C( 3071)}, + INT8_C( 0), + { UINT32_C(4288746203), UINT32_C(4274388335), UINT32_C(4267899996), UINT32_C(4209692473) } }, + { { UINT32_C( 11104270), UINT32_C( 2711059), UINT32_C( 11905031), UINT32_C( 2354726) }, + { UINT32_C( 62), UINT32_C( 19922), UINT32_C( 11313), UINT32_C( 12703) }, + { UINT32_C( 2373), UINT32_C( 16679), UINT32_C( 13946), UINT32_C( 10180)}, + INT8_C( 1), + { UINT32_C( 10070172), UINT32_C(3965399317), UINT32_C(4118182800), UINT32_C(4085448685) } }, + { { UINT32_C( 8015728), UINT32_C( 6058493), UINT32_C( 18794942), UINT32_C( 15641101) }, + { UINT32_C( 6854), UINT32_C( 15329), UINT32_C( 18526), UINT32_C( 3673) }, + { UINT32_C( 4060), UINT32_C( 5976), UINT32_C( 1701), UINT32_C( 1084)}, + INT8_C( 3), + { UINT32_C( 585992), UINT32_C(4284409153), UINT32_C(4293680054), UINT32_C( 11659569) } }, + }; + + simde_uint32x4_t r, a, b, v; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + a = simde_vld1q_u32(test_vec[i].a); + b = simde_vld1q_u32(test_vec[i].b); + v = simde_vld1q_u32(test_vec[i].v); + switch(test_vec[i].lane) { + case 0: r = simde_vmlsq_laneq_u32(a, b, v, 0); break; + case 1: r = simde_vmlsq_laneq_u32(a, b, v, 1); break; + case 2: r = simde_vmlsq_laneq_u32(a, b, v, 2); break; + case 3: r = simde_vmlsq_laneq_u32(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_u32(0); break; + } + simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + } + + return 0; +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vmls_lane_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vmls_lane_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmls_lane_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vmls_lane_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmls_lane_u32) + +SIMDE_TEST_FUNC_LIST_ENTRY(vmls_laneq_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vmls_laneq_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmls_laneq_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vmls_laneq_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmls_laneq_u32) + +SIMDE_TEST_FUNC_LIST_ENTRY(vmlsq_lane_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlsq_lane_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlsq_lane_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlsq_lane_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlsq_lane_u32) + +SIMDE_TEST_FUNC_LIST_ENTRY(vmlsq_laneq_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlsq_laneq_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlsq_laneq_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlsq_laneq_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlsq_laneq_u32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/mlsl_high_lane.c b/test/arm/neon/mlsl_high_lane.c new file mode 100644 index 000000000..17235cb2e --- /dev/null +++ b/test/arm/neon/mlsl_high_lane.c @@ -0,0 +1,579 @@ +#define SIMDE_TEST_ARM_NEON_INSN mlsl_high_lane + +#include "test-neon.h" +#include "../../../simde/arm/neon/mlsl_high_lane.h" + +static int +test_simde_vmlsl_high_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[8]; + int16_t v[8]; + int8_t lane; + int32_t r[4]; + } test_vec[] = { + { { INT32_C( 67712035), -INT32_C( 67864461), INT32_C( 11349754), INT32_C( 52169456) }, + { INT16_C( 6212), -INT16_C( 8322), INT16_C( 7490), -INT16_C( 4147), + INT16_C( 7816), -INT16_C( 4356), INT16_C( 1955), -INT16_C( 6760) }, + { -INT16_C( 3314), -INT16_C( 1248), INT16_C( 6796), INT16_C( 1372), + -INT16_C( 6756), -INT16_C( 5059), INT16_C( 108), INT16_C( 4649)}, + INT8_C( 4), + { INT32_C( 120516931), -INT32_C( 97293597), INT32_C( 24557734), INT32_C( 6498896) } }, + { { INT32_C( 75597580), -INT32_C( 60874521), -INT32_C( 67759165), INT32_C( 80327885) }, + { -INT16_C( 8993), -INT16_C( 682), INT16_C( 8817), -INT16_C( 6314), + -INT16_C( 6394), INT16_C( 3962), -INT16_C( 6496), -INT16_C( 2545) }, + { -INT16_C( 7603), INT16_C( 209), -INT16_C( 7567), -INT16_C( 7069), + -INT16_C( 1252), -INT16_C( 8141), -INT16_C( 4732), -INT16_C( 7645)}, + INT8_C( 7), + { INT32_C( 26715450), -INT32_C( 30585031), -INT32_C( 117421085), INT32_C( 60871360) } }, + { { INT32_C( 26187969), -INT32_C( 34665412), INT32_C( 37889481), INT32_C( 14150274) }, + { INT16_C( 3068), -INT16_C( 3713), -INT16_C( 2487), -INT16_C( 9210), + INT16_C( 1167), -INT16_C( 7653), -INT16_C( 2218), INT16_C( 4419) }, + { -INT16_C( 3731), INT16_C( 6397), INT16_C( 5705), -INT16_C( 8894), + -INT16_C( 439), -INT16_C( 8184), -INT16_C( 3147), -INT16_C( 9441)}, + INT8_C( 7), + { INT32_C( 37205616), -INT32_C( 106917385), INT32_C( 16949343), INT32_C( 55870053) } }, + { { INT32_C( 59507820), -INT32_C( 33087451), INT32_C( 24156037), INT32_C( 39805590) }, + { INT16_C( 9106), -INT16_C( 7586), -INT16_C( 3923), INT16_C( 8412), + -INT16_C( 5902), INT16_C( 9129), INT16_C( 5614), -INT16_C( 759) }, + { -INT16_C( 6242), -INT16_C( 9156), INT16_C( 6483), INT16_C( 5463), + -INT16_C( 5715), -INT16_C( 8533), -INT16_C( 5979), INT16_C( 8666)}, + INT8_C( 7), + { INT32_C( 110654552), -INT32_C( 112199365), -INT32_C( 24494887), INT32_C( 46383084) } }, + { { INT32_C( 70178161), -INT32_C( 83399214), INT32_C( 38246661), INT32_C( 19106266) }, + { INT16_C( 2663), -INT16_C( 159), INT16_C( 9408), INT16_C( 5538), + INT16_C( 4546), INT16_C( 1653), -INT16_C( 4482), -INT16_C( 2814) }, + { -INT16_C( 7600), INT16_C( 4001), INT16_C( 1914), INT16_C( 4530), + -INT16_C( 7475), INT16_C( 9916), -INT16_C( 6657), INT16_C( 389)}, + INT8_C( 6), + { INT32_C( 100440883), -INT32_C( 72395193), INT32_C( 8409987), INT32_C( 373468) } }, + { { -INT32_C( 95033943), -INT32_C( 14366530), INT32_C( 84221826), -INT32_C( 27784504) }, + { -INT16_C( 227), -INT16_C( 9636), INT16_C( 6066), -INT16_C( 588), + -INT16_C( 8009), -INT16_C( 2439), INT16_C( 9020), -INT16_C( 6696) }, + { INT16_C( 6567), INT16_C( 5099), INT16_C( 1544), -INT16_C( 2666), + -INT16_C( 9246), INT16_C( 6472), -INT16_C( 8268), INT16_C( 3662)}, + INT8_C( 2), + { -INT32_C( 82668047), -INT32_C( 10600714), INT32_C( 70294946), -INT32_C( 17445880) } }, + { { INT32_C( 73170314), -INT32_C( 38806359), -INT32_C( 6353119), INT32_C( 11876393) }, + { -INT16_C( 2801), INT16_C( 5967), -INT16_C( 1216), -INT16_C( 2920), + -INT16_C( 8145), INT16_C( 1392), -INT16_C( 5137), INT16_C( 8336) }, + { -INT16_C( 6380), -INT16_C( 670), INT16_C( 146), INT16_C( 5849), + -INT16_C( 2698), INT16_C( 6342), INT16_C( 4486), INT16_C( 8046)}, + INT8_C( 2), + { INT32_C( 74359484), -INT32_C( 39009591), -INT32_C( 5603117), INT32_C( 10659337) } }, + { { INT32_C( 74620035), INT32_C( 19263285), INT32_C( 44551400), -INT32_C( 57917580) }, + { -INT16_C( 5033), -INT16_C( 7722), INT16_C( 3965), -INT16_C( 4620), + -INT16_C( 9970), INT16_C( 9675), -INT16_C( 5475), INT16_C( 6604) }, + { -INT16_C( 8764), INT16_C( 2998), INT16_C( 2265), INT16_C( 3368), + -INT16_C( 4897), INT16_C( 3529), INT16_C( 2167), -INT16_C( 2180)}, + INT8_C( 5), + { INT32_C( 109804165), -INT32_C( 14879790), INT32_C( 63872675), -INT32_C( 81223096) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + simde_int16x8_t v = simde_vld1q_s16(test_vec[i].v); + simde_int32x4_t r; + SIMDE_CONSTIFY_8_(simde_vmlsl_high_laneq_s16, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlsl_high_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[8]; + int16_t v[4]; + int8_t lane; + int32_t r[4]; + } test_vec[] = { + { { -INT32_C( 16038286), -INT32_C( 28905122), -INT32_C( 41483605), INT32_C( 30927504) }, + { -INT16_C( 6766), INT16_C( 3589), INT16_C( 4613), INT16_C( 8569), + -INT16_C( 4501), -INT16_C( 781), INT16_C( 55), INT16_C( 7815) }, + { -INT16_C( 1002), INT16_C( 8827), -INT16_C( 2491), -INT16_C( 2507)}, + INT8_C( 1), + { INT32_C( 23692041), -INT32_C( 22011235), -INT32_C( 41969090), -INT32_C( 38055501) } }, + { { -INT32_C( 1465584), -INT32_C( 93487862), -INT32_C( 45926674), -INT32_C( 67678742) }, + { INT16_C( 2292), INT16_C( 6442), -INT16_C( 4172), INT16_C( 7676), + -INT16_C( 4398), INT16_C( 9179), INT16_C( 8183), -INT16_C( 4844) }, + { INT16_C( 9033), -INT16_C( 6631), INT16_C( 1491), INT16_C( 1758)}, + INT8_C( 0), + { INT32_C( 38261550), -INT32_C( 176401769), -INT32_C( 119843713), -INT32_C( 23922890) } }, + { { INT32_C( 63865457), INT32_C( 78987025), -INT32_C( 62827444), -INT32_C( 20940752) }, + { -INT16_C( 6639), -INT16_C( 7243), INT16_C( 4755), INT16_C( 114), + INT16_C( 9724), -INT16_C( 2345), INT16_C( 1685), -INT16_C( 3253) }, + { -INT16_C( 9839), INT16_C( 8885), INT16_C( 9180), INT16_C( 7911)}, + INT8_C( 3), + { -INT32_C( 13061107), INT32_C( 97538320), -INT32_C( 76157479), INT32_C( 4793731) } }, + { { INT32_C( 93331675), -INT32_C( 42492077), INT32_C( 87569415), -INT32_C( 40528661) }, + { INT16_C( 6333), INT16_C( 6967), INT16_C( 1558), INT16_C( 1671), + INT16_C( 2212), INT16_C( 1053), -INT16_C( 9343), -INT16_C( 4322) }, + { -INT16_C( 360), INT16_C( 5517), -INT16_C( 850), INT16_C( 589)}, + INT8_C( 0), + { INT32_C( 94127995), -INT32_C( 42112997), INT32_C( 84205935), -INT32_C( 42084581) } }, + { { INT32_C( 67912905), INT32_C( 80342618), INT32_C( 19746680), -INT32_C( 45657976) }, + { INT16_C( 1855), -INT16_C( 312), -INT16_C( 795), INT16_C( 4535), + -INT16_C( 875), INT16_C( 1465), -INT16_C( 8325), -INT16_C( 511) }, + { -INT16_C( 3560), -INT16_C( 7596), -INT16_C( 6733), -INT16_C( 7743)}, + INT8_C( 3), + { INT32_C( 61137780), INT32_C( 91686113), -INT32_C( 44713795), -INT32_C( 49614649) } }, + { { -INT32_C( 78117098), INT32_C( 22969964), INT32_C( 14298623), -INT32_C( 65823014) }, + { -INT16_C( 4909), INT16_C( 793), INT16_C( 8395), INT16_C( 1872), + -INT16_C( 3709), -INT16_C( 6161), -INT16_C( 6335), -INT16_C( 3541) }, + { -INT16_C( 5046), INT16_C( 1110), -INT16_C( 3197), -INT16_C( 2235)}, + INT8_C( 2), + { -INT32_C( 89974771), INT32_C( 3273247), -INT32_C( 5954372), -INT32_C( 77143591) } }, + { { -INT32_C( 10285084), -INT32_C( 6337390), INT32_C( 33112012), INT32_C( 22633887) }, + { INT16_C( 7823), INT16_C( 5178), -INT16_C( 1524), INT16_C( 5223), + INT16_C( 3728), INT16_C( 1041), -INT16_C( 4068), INT16_C( 9734) }, + { -INT16_C( 2560), -INT16_C( 2460), INT16_C( 1203), INT16_C( 7274)}, + INT8_C( 1), + { -INT32_C( 1114204), -INT32_C( 3776530), INT32_C( 23104732), INT32_C( 46579527) } }, + { { -INT32_C( 7539938), INT32_C( 29005221), INT32_C( 87743249), -INT32_C( 60072285) }, + { INT16_C( 59), -INT16_C( 3792), -INT16_C( 4149), INT16_C( 2029), + -INT16_C( 9974), -INT16_C( 1840), INT16_C( 9068), INT16_C( 7620) }, + { -INT16_C( 1100), -INT16_C( 4625), INT16_C( 4297), -INT16_C( 5953)}, + INT8_C( 0), + { -INT32_C( 18511338), INT32_C( 26981221), INT32_C( 97718049), -INT32_C( 51690285) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + simde_int16x4_t v = simde_vld1_s16(test_vec[i].v); + simde_int32x4_t r; + SIMDE_CONSTIFY_4_(simde_vmlsl_high_lane_s16, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlsl_high_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[4]; + int32_t v[4]; + int8_t lane; + int64_t r[2]; + } test_vec[] = { + { { -INT64_C( 455879767), -INT64_C( 866486967) }, + { INT32_C( 743175), -INT32_C( 968111), INT32_C( 116900), INT32_C( 587450) }, + { INT32_C( 171585), -INT32_C( 745875), INT32_C( 868410), INT32_C( 148291)}, + INT8_C( 0), + { -INT64_C( 20514166267), -INT64_C( 101664095217) } }, + { { -INT64_C( 826317710), INT64_C( 500734654) }, + { INT32_C( 171853), -INT32_C( 119370), INT32_C( 240470), INT32_C( 141429) }, + { -INT32_C( 835082), -INT32_C( 66691), -INT32_C( 592739), -INT32_C( 599194)}, + INT8_C( 2), + { INT64_C( 141709629620), INT64_C( 84331218685) } }, + { { -INT64_C( 112887201), INT64_C( 931258661) }, + { INT32_C( 421732), -INT32_C( 791541), INT32_C( 970308), INT32_C( 537492) }, + { -INT32_C( 975259), INT32_C( 966845), -INT32_C( 107533), INT32_C( 451460)}, + INT8_C( 3), + { -INT64_C( 438168136881), -INT64_C( 241724879659) } }, + { { -INT64_C( 695291438), -INT64_C( 484847055) }, + { -INT32_C( 141398), -INT32_C( 191063), -INT32_C( 991962), INT32_C( 8773) }, + { INT32_C( 143999), INT32_C( 928844), -INT32_C( 572636), -INT32_C( 865390)}, + INT8_C( 2), + { -INT64_C( 568728443270), INT64_C( 4538888573) } }, + { { INT64_C( 824767030), INT64_C( 935157684) }, + { INT32_C( 462167), INT32_C( 230846), INT32_C( 485541), INT32_C( 495559) }, + { -INT32_C( 786156), -INT32_C( 603217), INT32_C( 818113), INT32_C( 187995)}, + INT8_C( 2), + { -INT64_C( 396402637103), -INT64_C( 404488102483) } }, + { { -INT64_C( 68123208), -INT64_C( 140265965) }, + { -INT32_C( 881879), -INT32_C( 409087), INT32_C( 907326), INT32_C( 894362) }, + { -INT32_C( 67813), -INT32_C( 787612), -INT32_C( 785492), -INT32_C( 828160)}, + INT8_C( 0), + { INT64_C( 61460374830), INT64_C( 60509104341) } }, + { { -INT64_C( 194996680), INT64_C( 107121797) }, + { INT32_C( 509167), INT32_C( 962020), INT32_C( 931436), -INT32_C( 84232) }, + { INT32_C( 165195), INT32_C( 187721), -INT32_C( 976542), INT32_C( 703973)}, + INT8_C( 2), + { INT64_C( 909391377632), -INT64_C( 82148963947) } }, + { { -INT64_C( 643129462), INT64_C( 766919626) }, + { -INT32_C( 487078), -INT32_C( 903804), INT32_C( 307937), -INT32_C( 572415) }, + { INT32_C( 433558), -INT32_C( 631566), -INT32_C( 446919), INT32_C( 22497)}, + INT8_C( 3), + { -INT64_C( 7570788151), INT64_C( 13644539881) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + simde_int32x4_t v = simde_vld1q_s32(test_vec[i].v); + simde_int64x2_t r; + SIMDE_CONSTIFY_4_(simde_vmlsl_high_laneq_s32, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlsl_high_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[4]; + int32_t v[2]; + int8_t lane; + int64_t r[2]; + } test_vec[] = { + { { -INT64_C( 779064126), INT64_C( 926199021) }, + { -INT32_C( 7543), INT32_C( 658987), -INT32_C( 306512), -INT32_C( 652839) }, + { INT32_C( 851028), -INT32_C( 22360)}, + INT8_C( 1), + { -INT64_C( 7632672446), -INT64_C( 13671281019) } }, + { { -INT64_C( 807375181), INT64_C( 424049830) }, + { -INT32_C( 435009), INT32_C( 136227), -INT32_C( 980835), INT32_C( 681295) }, + { -INT32_C( 170976), -INT32_C( 898669)}, + INT8_C( 0), + { -INT64_C( 168506620141), INT64_C( 116909143750) } }, + { { INT64_C( 949966717), -INT64_C( 757291296) }, + { INT32_C( 221316), -INT32_C( 984565), INT32_C( 799048), -INT32_C( 415305) }, + { INT32_C( 138966), INT32_C( 30515)}, + INT8_C( 0), + { -INT64_C( 110090537651), INT64_C( 56955983334) } }, + { { -INT64_C( 761919040), -INT64_C( 521168046) }, + { INT32_C( 975792), INT32_C( 503464), INT32_C( 260846), -INT32_C( 135815) }, + { -INT32_C( 29474), -INT32_C( 357205)}, + INT8_C( 0), + { INT64_C( 6926255964), -INT64_C( 4524179356) } }, + { { INT64_C( 39073934), -INT64_C( 667409585) }, + { INT32_C( 935425), -INT32_C( 770364), -INT32_C( 376646), INT32_C( 52533) }, + { -INT32_C( 805091), -INT32_C( 419069)}, + INT8_C( 0), + { -INT64_C( 303195230852), INT64_C( 41626435918) } }, + { { -INT64_C( 320427737), INT64_C( 98736754) }, + { INT32_C( 725730), -INT32_C( 741638), INT32_C( 577936), INT32_C( 581300) }, + { -INT32_C( 927094), -INT32_C( 751422)}, + INT8_C( 0), + { INT64_C( 535480570247), INT64_C( 539018478954) } }, + { { INT64_C( 18901554), -INT64_C( 378285155) }, + { -INT32_C( 874243), -INT32_C( 873519), -INT32_C( 216250), INT32_C( 875544) }, + { -INT32_C( 571670), INT32_C( 755647)}, + INT8_C( 1), + { INT64_C( 163427565304), -INT64_C( 661980482123) } }, + { { INT64_C( 14496407), INT64_C( 779073912) }, + { INT32_C( 761041), -INT32_C( 995277), -INT32_C( 164203), INT32_C( 782472) }, + { -INT32_C( 63927), -INT32_C( 720738)}, + INT8_C( 0), + { -INT64_C( 10482508774), INT64_C( 50800161456) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + simde_int32x2_t v = simde_vld1_s32(test_vec[i].v); + simde_int64x2_t r; + SIMDE_CONSTIFY_2_(simde_vmlsl_high_lane_s32, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlsl_high_laneq_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[4]; + uint16_t b[8]; + uint16_t v[8]; + int8_t lane; + uint32_t r[4]; + } test_vec[] = { + { { UINT32_C( 187720474), UINT32_C( 114738506), UINT32_C( 154477949), UINT32_C( 37108131) }, + { UINT16_C( 11178), UINT16_C( 14095), UINT16_C( 19757), UINT16_C( 17210), + UINT16_C( 11826), UINT16_C( 15068), UINT16_C( 8896), UINT16_C( 11650) }, + { UINT16_C( 3516), UINT16_C( 12389), UINT16_C( 18464), UINT16_C( 13345), + UINT16_C( 13597), UINT16_C( 12234), UINT16_C( 2634), UINT16_C( 17835)}, + INT8_C( 4), + { UINT32_C( 26922352), UINT32_C( 4204826206), UINT32_C( 33519037), UINT32_C( 4173670377) } }, + { { UINT32_C( 86470971), UINT32_C( 31690454), UINT32_C( 18826166), UINT32_C( 46891713) }, + { UINT16_C( 10051), UINT16_C( 19970), UINT16_C( 19809), UINT16_C( 5870), + UINT16_C( 8884), UINT16_C( 9361), UINT16_C( 11925), UINT16_C( 2241) }, + { UINT16_C( 3459), UINT16_C( 9236), UINT16_C( 14016), UINT16_C( 19498), + UINT16_C( 4400), UINT16_C( 982), UINT16_C( 13826), UINT16_C( 19737)}, + INT8_C( 2), + { UINT32_C( 4256920123), UINT32_C( 4195453974), UINT32_C( 4146652662), UINT32_C( 15481857) } }, + { { UINT32_C( 106352497), UINT32_C( 72349451), UINT32_C( 129526511), UINT32_C( 63122726) }, + { UINT16_C( 9656), UINT16_C( 18065), UINT16_C( 6404), UINT16_C( 10849), + UINT16_C( 4802), UINT16_C( 9367), UINT16_C( 3503), UINT16_C( 7190) }, + { UINT16_C( 3112), UINT16_C( 9509), UINT16_C( 6718), UINT16_C( 10772), + UINT16_C( 6968), UINT16_C( 11549), UINT16_C( 18296), UINT16_C( 5254)}, + INT8_C( 1), + { UINT32_C( 60690279), UINT32_C( 4278245944), UINT32_C( 96216484), UINT32_C( 4289720312) } }, + { { UINT32_C( 75743798), UINT32_C( 158787442), UINT32_C( 96830003), UINT32_C( 140288831) }, + { UINT16_C( 10998), UINT16_C( 1303), UINT16_C( 2067), UINT16_C( 13241), + UINT16_C( 12475), UINT16_C( 18530), UINT16_C( 8560), UINT16_C( 19234) }, + { UINT16_C( 19672), UINT16_C( 14469), UINT16_C( 16251), UINT16_C( 1202), + UINT16_C( 3808), UINT16_C( 3251), UINT16_C( 14644), UINT16_C( 5374)}, + INT8_C( 2), + { UINT32_C( 4167979869), UINT32_C( 4152623708), UINT32_C( 4252688739), UINT32_C( 4122684393) } }, + { { UINT32_C( 18116890), UINT32_C( 139746422), UINT32_C( 18991941), UINT32_C( 18587699) }, + { UINT16_C( 7461), UINT16_C( 15853), UINT16_C( 11733), UINT16_C( 3207), + UINT16_C( 752), UINT16_C( 19168), UINT16_C( 6498), UINT16_C( 19201) }, + { UINT16_C( 11749), UINT16_C( 15661), UINT16_C( 8124), UINT16_C( 14225), + UINT16_C( 11175), UINT16_C( 4834), UINT16_C( 1326), UINT16_C( 9295)}, + INT8_C( 3), + { UINT32_C( 7419690), UINT32_C( 4162048918), UINT32_C( 4221525187), UINT32_C( 4040420770) } }, + { { UINT32_C( 177779414), UINT32_C( 28653639), UINT32_C( 191452144), UINT32_C( 122212991) }, + { UINT16_C( 16872), UINT16_C( 2341), UINT16_C( 19547), UINT16_C( 3013), + UINT16_C( 10180), UINT16_C( 18540), UINT16_C( 3216), UINT16_C( 18595) }, + { UINT16_C( 19844), UINT16_C( 19936), UINT16_C( 6809), UINT16_C( 6435), + UINT16_C( 16250), UINT16_C( 18972), UINT16_C( 1879), UINT16_C( 700)}, + INT8_C( 6), + { UINT32_C( 158651194), UINT32_C( 4288784275), UINT32_C( 185409280), UINT32_C( 87272986) } }, + { { UINT32_C( 191118184), UINT32_C( 133087100), UINT32_C( 6939589), UINT32_C( 179040854) }, + { UINT16_C( 17789), UINT16_C( 983), UINT16_C( 5092), UINT16_C( 13224), + UINT16_C( 9772), UINT16_C( 12128), UINT16_C( 11539), UINT16_C( 6813) }, + { UINT16_C( 16436), UINT16_C( 4748), UINT16_C( 10981), UINT16_C( 6595), + UINT16_C( 15229), UINT16_C( 8269), UINT16_C( 14302), UINT16_C( 16045)}, + INT8_C( 4), + { UINT32_C( 42300396), UINT32_C( 4243357084), UINT32_C( 4126179454), UINT32_C( 75285677) } }, + { { UINT32_C( 161867121), UINT32_C( 166534840), UINT32_C( 152062199), UINT32_C( 100090646) }, + { UINT16_C( 18028), UINT16_C( 14483), UINT16_C( 12126), UINT16_C( 5959), + UINT16_C( 14581), UINT16_C( 18561), UINT16_C( 925), UINT16_C( 19196) }, + { UINT16_C( 2543), UINT16_C( 12197), UINT16_C( 19676), UINT16_C( 11063), + UINT16_C( 11990), UINT16_C( 14163), UINT16_C( 8178), UINT16_C( 1054)}, + INT8_C( 6), + { UINT32_C( 42623703), UINT32_C( 14742982), UINT32_C( 144497549), UINT32_C( 4238073054) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); + simde_uint16x8_t v = simde_vld1q_u16(test_vec[i].v); + simde_uint32x4_t r; + SIMDE_CONSTIFY_8_(simde_vmlsl_high_laneq_u16, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlsl_high_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[4]; + uint16_t b[8]; + uint16_t v[4]; + int8_t lane; + uint32_t r[4]; + } test_vec[] = { + { { UINT32_C( 128129524), UINT32_C( 3786337), UINT32_C( 89219593), UINT32_C( 156381984) }, + { UINT16_C( 16047), UINT16_C( 4999), UINT16_C( 18363), UINT16_C( 12281), + UINT16_C( 6887), UINT16_C( 15950), UINT16_C( 5386), UINT16_C( 8860) }, + { UINT16_C( 1477), UINT16_C( 9737), UINT16_C( 8887), UINT16_C( 18453)}, + INT8_C( 1), + { UINT32_C( 61070805), UINT32_C( 4143448483), UINT32_C( 36776111), UINT32_C( 70112164) } }, + { { UINT32_C( 154613048), UINT32_C( 62703310), UINT32_C( 94381071), UINT32_C( 21069636) }, + { UINT16_C( 1583), UINT16_C( 19523), UINT16_C( 1869), UINT16_C( 10490), + UINT16_C( 13433), UINT16_C( 15062), UINT16_C( 1731), UINT16_C( 9759) }, + { UINT16_C( 19043), UINT16_C( 13447), UINT16_C( 13345), UINT16_C( 8332)}, + INT8_C( 1), + { UINT32_C( 4268946793), UINT32_C( 4155131892), UINT32_C( 71104314), UINT32_C( 4184807659) } }, + { { UINT32_C( 18645400), UINT32_C( 52106771), UINT32_C( 91507360), UINT32_C( 8180764) }, + { UINT16_C( 6572), UINT16_C( 16642), UINT16_C( 11506), UINT16_C( 7010), + UINT16_C( 10364), UINT16_C( 19132), UINT16_C( 1311), UINT16_C( 9727) }, + { UINT16_C( 5894), UINT16_C( 3908), UINT16_C( 18924), UINT16_C( 8301)}, + INT8_C( 3), + { UINT32_C( 4227581132), UINT32_C( 4188259335), UINT32_C( 80624749), UINT32_C( 4222404233) } }, + { { UINT32_C( 81479648), UINT32_C( 76992396), UINT32_C( 177872811), UINT32_C( 169811774) }, + { UINT16_C( 952), UINT16_C( 9039), UINT16_C( 13623), UINT16_C( 8401), + UINT16_C( 17428), UINT16_C( 13099), UINT16_C( 3553), UINT16_C( 2542) }, + { UINT16_C( 15201), UINT16_C( 7062), UINT16_C( 2290), UINT16_C( 747)}, + INT8_C( 2), + { UINT32_C( 41569528), UINT32_C( 46995686), UINT32_C( 169736441), UINT32_C( 163990594) } }, + { { UINT32_C( 1560560), UINT32_C( 79174338), UINT32_C( 142942120), UINT32_C( 22318614) }, + { UINT16_C( 12362), UINT16_C( 13981), UINT16_C( 11678), UINT16_C( 5815), + UINT16_C( 17685), UINT16_C( 9255), UINT16_C( 2518), UINT16_C( 4104) }, + { UINT16_C( 150), UINT16_C( 12096), UINT16_C( 10674), UINT16_C( 9603)}, + INT8_C( 1), + { UINT32_C( 4082610096), UINT32_C( 4262193154), UINT32_C( 112484392), UINT32_C( 4267643926) } }, + { { UINT32_C( 144333765), UINT32_C( 154887730), UINT32_C( 39392927), UINT32_C( 180100438) }, + { UINT16_C( 5728), UINT16_C( 2487), UINT16_C( 7638), UINT16_C( 6550), + UINT16_C( 243), UINT16_C( 12181), UINT16_C( 3983), UINT16_C( 4204) }, + { UINT16_C( 1344), UINT16_C( 18837), UINT16_C( 14633), UINT16_C( 17217)}, + INT8_C( 0), + { UINT32_C( 144007173), UINT32_C( 138516466), UINT32_C( 34039775), UINT32_C( 174450262) } }, + { { UINT32_C( 148671659), UINT32_C( 92903426), UINT32_C( 144006204), UINT32_C( 158401926) }, + { UINT16_C( 10515), UINT16_C( 2001), UINT16_C( 10205), UINT16_C( 15759), + UINT16_C( 8453), UINT16_C( 15176), UINT16_C( 16053), UINT16_C( 6945) }, + { UINT16_C( 7678), UINT16_C( 896), UINT16_C( 265), UINT16_C( 2973)}, + INT8_C( 0), + { UINT32_C( 83769525), UINT32_C( 4271349394), UINT32_C( 20751270), UINT32_C( 105078216) } }, + { { UINT32_C( 23512841), UINT32_C( 124122782), UINT32_C( 95375810), UINT32_C( 162866734) }, + { UINT16_C( 3173), UINT16_C( 2773), UINT16_C( 18470), UINT16_C( 2835), + UINT16_C( 8062), UINT16_C( 3364), UINT16_C( 11264), UINT16_C( 12468) }, + { UINT16_C( 3547), UINT16_C( 18994), UINT16_C( 3273), UINT16_C( 6367)}, + INT8_C( 3), + { UINT32_C( 4267149383), UINT32_C( 102704194), UINT32_C( 23657922), UINT32_C( 83482978) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); + simde_uint16x4_t v = simde_vld1_u16(test_vec[i].v); + simde_uint32x4_t r; + SIMDE_CONSTIFY_4_(simde_vmlsl_high_lane_u16, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlsl_high_laneq_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[2]; + uint32_t b[4]; + uint32_t v[4]; + int8_t lane; + uint64_t r[2]; + } test_vec[] = { + { { UINT64_C( 1039934347), UINT64_C( 275280831) }, + { UINT32_C( 1037015), UINT32_C( 235679), UINT32_C( 382401), UINT32_C( 303311) }, + { UINT32_C( 1100818), UINT32_C( 949066), UINT32_C( 654879), UINT32_C( 944978)}, + INT8_C( 0), + { UINT64_C(18446743653795581945), UINT64_C(18446743740094624049) } }, + { { UINT64_C( 1673630883), UINT64_C( 1187815648) }, + { UINT32_C( 1508415), UINT32_C( 759706), UINT32_C( 1215861), UINT32_C( 373592) }, + { UINT32_C( 1140856), UINT32_C( 62017), UINT32_C( 1341655), UINT32_C( 806406)}, + INT8_C( 2), + { UINT64_C(18446742444117192544), UINT64_C(18446743573665792504) } }, + { { UINT64_C( 1732619684), UINT64_C( 1823057864) }, + { UINT32_C( 916962), UINT32_C( 1578628), UINT32_C( 1232510), UINT32_C( 750619) }, + { UINT32_C( 1060614), UINT32_C( 1407908), UINT32_C( 951551), UINT32_C( 1115279)}, + INT8_C( 2), + { UINT64_C(18446742902646048290), UINT64_C(18446743361280349411) } }, + { { UINT64_C( 435430309), UINT64_C( 1755206050) }, + { UINT32_C( 1263544), UINT32_C( 1208264), UINT32_C( 282088), UINT32_C( 1576857) }, + { UINT32_C( 834152), UINT32_C( 1446343), UINT32_C( 1167738), UINT32_C( 844313)}, + INT8_C( 1), + { UINT64_C(18446743666148977741), UINT64_C(18446741794788673715) } }, + { { UINT64_C( 517870838), UINT64_C( 1805264897) }, + { UINT32_C( 392603), UINT32_C( 693883), UINT32_C( 564681), UINT32_C( 1770000) }, + { UINT32_C( 350732), UINT32_C( 1777733), UINT32_C( 98652), UINT32_C( 262293)}, + INT8_C( 1), + { UINT64_C(18446743070375374281), UINT64_C(18446740928927406513) } }, + { { UINT64_C( 1224729507), UINT64_C( 1782158252) }, + { UINT32_C( 511816), UINT32_C( 1539770), UINT32_C( 908365), UINT32_C( 940596) }, + { UINT32_C( 122510), UINT32_C( 293642), UINT32_C( 927041), UINT32_C( 1320491)}, + INT8_C( 1), + { UINT64_C(18446743808200165793), UINT64_C(18446743799293219236) } }, + { { UINT64_C( 1904006206), UINT64_C( 662780591) }, + { UINT32_C( 1169760), UINT32_C( 1600085), UINT32_C( 87097), UINT32_C( 1607558) }, + { UINT32_C( 112360), UINT32_C( 1461102), UINT32_C( 1127403), UINT32_C( 1753289)}, + INT8_C( 2), + { UINT64_C(18446743977420138731), UINT64_C(18446742262006620333) } }, + { { UINT64_C( 1717748601), UINT64_C( 884477975) }, + { UINT32_C( 154487), UINT32_C( 1186882), UINT32_C( 1967124), UINT32_C( 352963) }, + { UINT32_C( 1412406), UINT32_C( 1778692), UINT32_C( 1967061), UINT32_C( 1813001)}, + INT8_C( 0), + { UINT64_C(18446741297049559873), UINT64_C(18446743576066970613) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); + simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); + simde_uint32x4_t v = simde_vld1q_u32(test_vec[i].v); + simde_uint64x2_t r; + SIMDE_CONSTIFY_4_(simde_vmlsl_high_laneq_u32, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vmlsl_high_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[2]; + uint32_t b[4]; + uint32_t v[2]; + int8_t lane; + uint64_t r[2]; + } test_vec[] = { + { { UINT64_C( 807346442), UINT64_C( 390336432) }, + { UINT32_C( 1727904), UINT32_C( 1212124), UINT32_C( 729670), UINT32_C( 1999448) }, + { UINT32_C( 321407), UINT32_C( 1749330)}, + INT8_C( 1), + { UINT64_C(18446742798083276958), UINT64_C(18446740576405518208) } }, + { { UINT64_C( 1130610500), UINT64_C( 1294274127) }, + { UINT32_C( 1792777), UINT32_C( 1677015), UINT32_C( 570046), UINT32_C( 333541) }, + { UINT32_C( 960454), UINT32_C( 1978051)}, + INT8_C( 1), + { UINT64_C(18446742947260101770), UINT64_C(18446743415242717152) } }, + { { UINT64_C( 1764044054), UINT64_C( 1316201253) }, + { UINT32_C( 590075), UINT32_C( 998633), UINT32_C( 465915), UINT32_C( 50919) }, + { UINT32_C( 1611355), UINT32_C( 1506608)}, + INT8_C( 1), + { UINT64_C(18446743373522329350), UINT64_C(18446743998310780117) } }, + { { UINT64_C( 1691008098), UINT64_C( 171003782) }, + { UINT32_C( 1110725), UINT32_C( 28618), UINT32_C( 33748), UINT32_C( 91128) }, + { UINT32_C( 918135), UINT32_C( 884987)}, + INT8_C( 0), + { UINT64_C(18446744044415339734), UINT64_C(18446743990212749118) } }, + { { UINT64_C( 1821092036), UINT64_C( 1170774552) }, + { UINT32_C( 1433017), UINT32_C( 609507), UINT32_C( 937285), UINT32_C( 613346) }, + { UINT32_C( 1688325), UINT32_C( 219981)}, + INT8_C( 1), + { UINT64_C(18446743869345752067), UINT64_C(18446743939955859742) } }, + { { UINT64_C( 657842295), UINT64_C( 149096922) }, + { UINT32_C( 1406492), UINT32_C( 1660419), UINT32_C( 1588085), UINT32_C( 222194) }, + { UINT32_C( 1600913), UINT32_C( 436431)}, + INT8_C( 1), + { UINT64_C(18446743381277869276), UINT64_C(18446743976886298924) } }, + { { UINT64_C( 1800862766), UINT64_C( 351385912) }, + { UINT32_C( 1985028), UINT32_C( 610354), UINT32_C( 598858), UINT32_C( 634547) }, + { UINT32_C( 161297), UINT32_C( 1989972)}, + INT8_C( 0), + { UINT64_C(18446743978916415556), UINT64_C(18446743971710410069) } }, + { { UINT64_C( 660613479), UINT64_C( 1473821982) }, + { UINT32_C( 1795900), UINT32_C( 1528251), UINT32_C( 1047197), UINT32_C( 1818799) }, + { UINT32_C( 725139), UINT32_C( 1130632)}, + INT8_C( 0), + { UINT64_C(18446743315006779712), UINT64_C(18446742756301285537) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); + simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); + simde_uint32x2_t v = simde_vld1_u32(test_vec[i].v); + simde_uint64x2_t r; + SIMDE_CONSTIFY_2_(simde_vmlsl_high_lane_u32, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); + } + + return 0; +} + + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_high_lane_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_high_lane_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_high_lane_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_high_lane_u32) + +SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_high_laneq_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_high_laneq_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_high_laneq_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_high_laneq_u32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/modify_c.txt b/test/arm/neon/modify_c.txt new file mode 100644 index 000000000..b70c25ef8 --- /dev/null +++ b/test/arm/neon/modify_c.txt @@ -0,0 +1,83 @@ +abd.c +abdl_high.c +abs.c +addhn_high.c +cgez.c +cgtz.c +cle.c +cltz.c +copy_lane.c +cvt.c +cvt_n.c +cvtm.c +cvtp.c +div.c +dup_lane.c +eor.c +fmlal.c +fmlsl.c +ld3.c +ld4.c +max.c +maxnm.c +maxnmv.c +maxv.c +min.c +minnm.c +minnmv.c +minv.c +mla_lane.c +mls_lane.c +mmlaq.c +mull_high_lane.c +mull_high_n.c +mulx.c +mulx_lane.c +mulx_n.c +padd.c +pmax.c +pmaxnm.c +pmin.c +pminnm.c +qdmlal_lane.c +qdmlsl_lane.c +qdmull_high_lane.c +qmovun_high.c +qrdmlah.c +qrdmlah_lane.c +qrdmlsh.c +qrdmlsh_lane.c +qrdmulh_lane.c +qrshl.c +qrshrn_high_n.c +qrshrun_high_n.c +qshl_n.c +qshrn_high_n.c +qshrn_n.c +raddhn.c +raddhn_high.c +reinterpret.c +rev64.c +rshrn_high_n.c +rshrn_n.c +rsubhn.c +rsubhn_high.c +sli_n.c +st1_lane.c +st1_x2.c +st1_x3.c +st1_x4.c +st1q_x2.c +st1q_x3.c +st1q_x4.c +st2_lane.c +st3.c +st3_lane.c +st4.c +st4_lane.c +trn.c +trn1.c +trn2.c +uzp.c +uzp1.c +uzp2.c diff --git a/test/arm/neon/mul.c b/test/arm/neon/mul.c index 03a358724..9ae0f6b5f 100644 --- a/test/arm/neon/mul.c +++ b/test/arm/neon/mul.c @@ -3,6 +3,48 @@ #include "test-neon.h" #include "../../../simde/arm/neon/mul.h" +static int +test_simde_vmulh_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a; + simde_float16 b; + simde_float16 r; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE( 147.875), + SIMDE_FLOAT16_VALUE( 27.609375), + SIMDE_FLOAT16_VALUE( 4082.0) }, + { SIMDE_FLOAT16_VALUE( 95.75), + SIMDE_FLOAT16_VALUE( 206.375), + SIMDE_FLOAT16_VALUE( 19760.0) }, + { SIMDE_FLOAT16_VALUE( -112.1875), + SIMDE_FLOAT16_VALUE( 207.0) , + SIMDE_FLOAT16_VALUE( -23216.0) }, + { SIMDE_FLOAT16_VALUE( -128.0) , + SIMDE_FLOAT16_VALUE( 144.625) , + SIMDE_FLOAT16_VALUE( -18512.0) }, + { SIMDE_FLOAT16_VALUE( 183.375), + SIMDE_FLOAT16_VALUE( -92.3125), + SIMDE_FLOAT16_VALUE( -16928.0) }, + { SIMDE_FLOAT16_VALUE( 252.875), + SIMDE_FLOAT16_VALUE( -217.125), + SIMDE_FLOAT16_VALUE( -54912.0) }, + { SIMDE_FLOAT16_VALUE( -237.875), + SIMDE_FLOAT16_VALUE( -14.34375), + SIMDE_FLOAT16_VALUE( 3412.0) }, + { SIMDE_FLOAT16_VALUE( -78.125), + SIMDE_FLOAT16_VALUE( -206.125), + SIMDE_FLOAT16_VALUE( 16104.0) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 r = simde_vmulh_f16(test_vec[i].a, test_vec[i].b); + + simde_assert_equal_f16(r, test_vec[i].r, 1); + } + + return 0; +} + static int test_simde_vmul_f16 (SIMDE_MUNIT_TEST_ARGS) { struct { @@ -1124,6 +1166,7 @@ test_simde_x_vmulq_u64 (SIMDE_MUNIT_TEST_ARGS) { } SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vmulh_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_f64) diff --git a/test/arm/neon/mul_lane.c b/test/arm/neon/mul_lane.c index b6adb4bd3..e069faef1 100644 --- a/test/arm/neon/mul_lane.c +++ b/test/arm/neon/mul_lane.c @@ -7,6 +7,49 @@ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +static int +test_simde_vmul_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + simde_float16 b[4]; + int lane; + simde_float16 r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-2.2), SIMDE_FLOAT16_VALUE(-6.4), SIMDE_FLOAT16_VALUE(-8.8), SIMDE_FLOAT16_VALUE(-3.0) }, + { SIMDE_FLOAT16_VALUE(-6.1), SIMDE_FLOAT16_VALUE(-3.0), SIMDE_FLOAT16_VALUE(-6.1), SIMDE_FLOAT16_VALUE(6.2) }, + INT8_C(2), + { SIMDE_FLOAT16_VALUE(13.420), SIMDE_FLOAT16_VALUE(39.040), SIMDE_FLOAT16_VALUE(53.680), SIMDE_FLOAT16_VALUE(18.300) } }, + { { SIMDE_FLOAT16_VALUE(-5.5), SIMDE_FLOAT16_VALUE(0.2), SIMDE_FLOAT16_VALUE(9.1), SIMDE_FLOAT16_VALUE(6.4) }, + { SIMDE_FLOAT16_VALUE(-0.2), SIMDE_FLOAT16_VALUE(-0.9), SIMDE_FLOAT16_VALUE(-7.8), SIMDE_FLOAT16_VALUE(-0.8) }, + INT8_C(3), + { SIMDE_FLOAT16_VALUE(4.400), SIMDE_FLOAT16_VALUE(-0.160), SIMDE_FLOAT16_VALUE(-7.280), SIMDE_FLOAT16_VALUE(-5.120) } }, + { { SIMDE_FLOAT16_VALUE(9.2), SIMDE_FLOAT16_VALUE(3.3), SIMDE_FLOAT16_VALUE(-9.1), SIMDE_FLOAT16_VALUE(5.3) }, + { SIMDE_FLOAT16_VALUE(3.4), SIMDE_FLOAT16_VALUE(1.2), SIMDE_FLOAT16_VALUE(6.2), SIMDE_FLOAT16_VALUE(1.3) }, + INT8_C(3), + { SIMDE_FLOAT16_VALUE(11.960), SIMDE_FLOAT16_VALUE(4.290), SIMDE_FLOAT16_VALUE(-11.830), SIMDE_FLOAT16_VALUE(6.890) } }, + { { SIMDE_FLOAT16_VALUE(4.7), SIMDE_FLOAT16_VALUE(-1.9), SIMDE_FLOAT16_VALUE(-0.5), SIMDE_FLOAT16_VALUE(-3.3) }, + { SIMDE_FLOAT16_VALUE(-4.7), SIMDE_FLOAT16_VALUE(-9.8), SIMDE_FLOAT16_VALUE(5.8), SIMDE_FLOAT16_VALUE(0.7) }, + INT8_C(3), + { SIMDE_FLOAT16_VALUE(3.290), SIMDE_FLOAT16_VALUE(-1.330), SIMDE_FLOAT16_VALUE(-0.350), SIMDE_FLOAT16_VALUE(-2.310) } }, + { { SIMDE_FLOAT16_VALUE(-9.6), SIMDE_FLOAT16_VALUE(-2.1), SIMDE_FLOAT16_VALUE(-5.5), SIMDE_FLOAT16_VALUE(9.7) }, + { SIMDE_FLOAT16_VALUE(-6.3), SIMDE_FLOAT16_VALUE(1.8), SIMDE_FLOAT16_VALUE(9.6), SIMDE_FLOAT16_VALUE(1.6) }, + INT8_C(3), + { SIMDE_FLOAT16_VALUE(-15.360), SIMDE_FLOAT16_VALUE(-3.360), SIMDE_FLOAT16_VALUE(-8.800), SIMDE_FLOAT16_VALUE(15.520) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + + simde_float16x4_t r; + SIMDE_CONSTIFY_4_(simde_vmul_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, a, b); + + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + } + + return 0; +} + static int test_simde_vmul_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1193,6 +1236,85 @@ test_simde_vmulq_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vmulq_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[8]; + simde_float16 b[8]; + int lane; + simde_float16 r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-4.6), SIMDE_FLOAT16_VALUE(0.6), SIMDE_FLOAT16_VALUE(5.9), SIMDE_FLOAT16_VALUE(-1.8), + SIMDE_FLOAT16_VALUE(0.0), SIMDE_FLOAT16_VALUE(-2.4), SIMDE_FLOAT16_VALUE(-1.9), SIMDE_FLOAT16_VALUE(9.7) }, + { SIMDE_FLOAT16_VALUE(9.3), SIMDE_FLOAT16_VALUE(-0.3), SIMDE_FLOAT16_VALUE(-6.4), SIMDE_FLOAT16_VALUE(0.1), + SIMDE_FLOAT16_VALUE(3.6), SIMDE_FLOAT16_VALUE(-2.8), SIMDE_FLOAT16_VALUE(-8.4), SIMDE_FLOAT16_VALUE(2.2) }, + INT8_C(6), + { SIMDE_FLOAT16_VALUE(38.640), SIMDE_FLOAT16_VALUE(-5.040), SIMDE_FLOAT16_VALUE(-49.560), SIMDE_FLOAT16_VALUE(15.120), + SIMDE_FLOAT16_VALUE(-0.000), SIMDE_FLOAT16_VALUE(20.160), SIMDE_FLOAT16_VALUE(15.960), SIMDE_FLOAT16_VALUE(-81.480) } }, + { { SIMDE_FLOAT16_VALUE(2.6), SIMDE_FLOAT16_VALUE(3.1), SIMDE_FLOAT16_VALUE(3.2), SIMDE_FLOAT16_VALUE(-8.3), + SIMDE_FLOAT16_VALUE(5.4), SIMDE_FLOAT16_VALUE(1.8), SIMDE_FLOAT16_VALUE(-7.8), SIMDE_FLOAT16_VALUE(-7.3) }, + { SIMDE_FLOAT16_VALUE(-3.5), SIMDE_FLOAT16_VALUE(5.5), SIMDE_FLOAT16_VALUE(-0.1), SIMDE_FLOAT16_VALUE(2.4), + SIMDE_FLOAT16_VALUE(9.5), SIMDE_FLOAT16_VALUE(-3.3), SIMDE_FLOAT16_VALUE(0.9), SIMDE_FLOAT16_VALUE(5.2) }, + INT8_C(1), + { SIMDE_FLOAT16_VALUE(14.300), SIMDE_FLOAT16_VALUE(17.050), SIMDE_FLOAT16_VALUE(17.600), SIMDE_FLOAT16_VALUE(-45.650), + SIMDE_FLOAT16_VALUE(29.700), SIMDE_FLOAT16_VALUE(9.900), SIMDE_FLOAT16_VALUE(-42.900), SIMDE_FLOAT16_VALUE(-40.150) } }, + { { SIMDE_FLOAT16_VALUE(-0.2), SIMDE_FLOAT16_VALUE(2.2), SIMDE_FLOAT16_VALUE(6.7), SIMDE_FLOAT16_VALUE(-6.7), + SIMDE_FLOAT16_VALUE(0.8), SIMDE_FLOAT16_VALUE(-5.1), SIMDE_FLOAT16_VALUE(1.0), SIMDE_FLOAT16_VALUE(-8.7) }, + { SIMDE_FLOAT16_VALUE(-5.8), SIMDE_FLOAT16_VALUE(2.9), SIMDE_FLOAT16_VALUE(7.3), SIMDE_FLOAT16_VALUE(6.6), + SIMDE_FLOAT16_VALUE(-0.3), SIMDE_FLOAT16_VALUE(6.2), SIMDE_FLOAT16_VALUE(2.5), SIMDE_FLOAT16_VALUE(0.2) }, + INT8_C(5), + { SIMDE_FLOAT16_VALUE(-1.240), SIMDE_FLOAT16_VALUE(13.640), SIMDE_FLOAT16_VALUE(41.540), SIMDE_FLOAT16_VALUE(-41.540), + SIMDE_FLOAT16_VALUE(4.960), SIMDE_FLOAT16_VALUE(-31.620), SIMDE_FLOAT16_VALUE(6.200), SIMDE_FLOAT16_VALUE(-53.940) } }, + { { SIMDE_FLOAT16_VALUE(1.5), SIMDE_FLOAT16_VALUE(-3.3), SIMDE_FLOAT16_VALUE(-0.3), SIMDE_FLOAT16_VALUE(1.4), + SIMDE_FLOAT16_VALUE(-3.7), SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(0.6), SIMDE_FLOAT16_VALUE(0.8) }, + { SIMDE_FLOAT16_VALUE(-4.1), SIMDE_FLOAT16_VALUE(2.0), SIMDE_FLOAT16_VALUE(-6.6), SIMDE_FLOAT16_VALUE(1.6), + SIMDE_FLOAT16_VALUE(1.3), SIMDE_FLOAT16_VALUE(8.6), SIMDE_FLOAT16_VALUE(8.0), SIMDE_FLOAT16_VALUE(2.0) }, + INT8_C(5), + { SIMDE_FLOAT16_VALUE(12.900), SIMDE_FLOAT16_VALUE(-28.380), SIMDE_FLOAT16_VALUE(-2.580), SIMDE_FLOAT16_VALUE(12.040), + SIMDE_FLOAT16_VALUE(-31.820), SIMDE_FLOAT16_VALUE(9.460), SIMDE_FLOAT16_VALUE(5.160), SIMDE_FLOAT16_VALUE(6.880) } }, + { { SIMDE_FLOAT16_VALUE(-0.3), SIMDE_FLOAT16_VALUE(9.6), SIMDE_FLOAT16_VALUE(6.8), SIMDE_FLOAT16_VALUE(-7.4), + SIMDE_FLOAT16_VALUE(6.2), SIMDE_FLOAT16_VALUE(-3.4), SIMDE_FLOAT16_VALUE(6.5), SIMDE_FLOAT16_VALUE(-9.6) }, + { SIMDE_FLOAT16_VALUE(-3.2), SIMDE_FLOAT16_VALUE(-9.0), SIMDE_FLOAT16_VALUE(-1.0), SIMDE_FLOAT16_VALUE(7.0), + SIMDE_FLOAT16_VALUE(7.8), SIMDE_FLOAT16_VALUE(9.6), SIMDE_FLOAT16_VALUE(-1.9), SIMDE_FLOAT16_VALUE(3.1) }, + INT8_C(6), + { SIMDE_FLOAT16_VALUE(0.570), SIMDE_FLOAT16_VALUE(-18.240), SIMDE_FLOAT16_VALUE(-12.920), SIMDE_FLOAT16_VALUE(14.060), + SIMDE_FLOAT16_VALUE(-11.780), SIMDE_FLOAT16_VALUE(6.460), SIMDE_FLOAT16_VALUE(-12.350), SIMDE_FLOAT16_VALUE(18.240) } }, + { { SIMDE_FLOAT16_VALUE(-2.6), SIMDE_FLOAT16_VALUE(3.3), SIMDE_FLOAT16_VALUE(3.3), SIMDE_FLOAT16_VALUE(-1.0), + SIMDE_FLOAT16_VALUE(-2.4), SIMDE_FLOAT16_VALUE(7.8), SIMDE_FLOAT16_VALUE(0.8), SIMDE_FLOAT16_VALUE(-1.4) }, + { SIMDE_FLOAT16_VALUE(6.5), SIMDE_FLOAT16_VALUE(-8.3), SIMDE_FLOAT16_VALUE(8.1), SIMDE_FLOAT16_VALUE(0.2), + SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(-2.7), SIMDE_FLOAT16_VALUE(7.9), SIMDE_FLOAT16_VALUE(-8.9) }, + INT8_C(6), + { SIMDE_FLOAT16_VALUE(-20.540), SIMDE_FLOAT16_VALUE(26.070), SIMDE_FLOAT16_VALUE(26.070), SIMDE_FLOAT16_VALUE(-7.900), + SIMDE_FLOAT16_VALUE(-18.960), SIMDE_FLOAT16_VALUE(61.620), SIMDE_FLOAT16_VALUE(6.320), SIMDE_FLOAT16_VALUE(-11.060) } }, + { { SIMDE_FLOAT16_VALUE(-3.9), SIMDE_FLOAT16_VALUE(-6.7), SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(1.3), + SIMDE_FLOAT16_VALUE(9.4), SIMDE_FLOAT16_VALUE(-5.3), SIMDE_FLOAT16_VALUE(-4.6), SIMDE_FLOAT16_VALUE(-6.5) }, + { SIMDE_FLOAT16_VALUE(8.3), SIMDE_FLOAT16_VALUE(2.5), SIMDE_FLOAT16_VALUE(-0.5), SIMDE_FLOAT16_VALUE(-7.4), + SIMDE_FLOAT16_VALUE(-7.7), SIMDE_FLOAT16_VALUE(-8.0), SIMDE_FLOAT16_VALUE(9.0), SIMDE_FLOAT16_VALUE(-0.2) }, + INT8_C(4), + { SIMDE_FLOAT16_VALUE(30.030), SIMDE_FLOAT16_VALUE(51.590), SIMDE_FLOAT16_VALUE(-3.080), SIMDE_FLOAT16_VALUE(-10.010), + SIMDE_FLOAT16_VALUE(-72.380), SIMDE_FLOAT16_VALUE(40.810), SIMDE_FLOAT16_VALUE(35.420), SIMDE_FLOAT16_VALUE(50.050) } }, + { { SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(-3.5), SIMDE_FLOAT16_VALUE(7.2), SIMDE_FLOAT16_VALUE(2.3), + SIMDE_FLOAT16_VALUE(-1.5), SIMDE_FLOAT16_VALUE(-9.1), SIMDE_FLOAT16_VALUE(-9.4), SIMDE_FLOAT16_VALUE(0.9) }, + { SIMDE_FLOAT16_VALUE(-1.4), SIMDE_FLOAT16_VALUE(9.7), SIMDE_FLOAT16_VALUE(-0.7), SIMDE_FLOAT16_VALUE(9.1), + SIMDE_FLOAT16_VALUE(-7.9), SIMDE_FLOAT16_VALUE(1.5), SIMDE_FLOAT16_VALUE(9.7), SIMDE_FLOAT16_VALUE(2.4) }, + INT8_C(1), + { SIMDE_FLOAT16_VALUE(13.580), SIMDE_FLOAT16_VALUE(-33.950), SIMDE_FLOAT16_VALUE(69.840), SIMDE_FLOAT16_VALUE(22.310), + SIMDE_FLOAT16_VALUE(-14.550), SIMDE_FLOAT16_VALUE(-88.270), SIMDE_FLOAT16_VALUE(-91.180), SIMDE_FLOAT16_VALUE(8.730) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + + simde_float16x8_t r; + SIMDE_CONSTIFY_8_(simde_vmulq_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, a, b); + + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + } + + return 0; +} + static int test_simde_vmulq_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1755,6 +1877,46 @@ test_simde_vmuld_laneq_f64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vmulh_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a; + simde_float16_t b[4]; + int lane; + simde_float16_t r; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE(-0.9), + { SIMDE_FLOAT16_VALUE(-6.7), SIMDE_FLOAT16_VALUE(-3.2), SIMDE_FLOAT16_VALUE(11.3), SIMDE_FLOAT16_VALUE(-6.7) }, + INT8_C(0), + SIMDE_FLOAT16_VALUE(6.030) }, + { SIMDE_FLOAT16_VALUE(4.5), + { SIMDE_FLOAT16_VALUE(-1.1), SIMDE_FLOAT16_VALUE(7.4), SIMDE_FLOAT16_VALUE(-6.5), SIMDE_FLOAT16_VALUE(13.1) }, + INT8_C(2), + SIMDE_FLOAT16_VALUE(-29.250) }, + { SIMDE_FLOAT16_VALUE(-3.7), + { SIMDE_FLOAT16_VALUE(1.6), SIMDE_FLOAT16_VALUE(13.7), SIMDE_FLOAT16_VALUE(13.3), SIMDE_FLOAT16_VALUE(14.0) }, + INT8_C(1), + SIMDE_FLOAT16_VALUE(-50.690) }, + { SIMDE_FLOAT16_VALUE(13.3), + { SIMDE_FLOAT16_VALUE(11.7), SIMDE_FLOAT16_VALUE(8.7), SIMDE_FLOAT16_VALUE(-10.1), SIMDE_FLOAT16_VALUE(-4.4) }, + INT8_C(1), + SIMDE_FLOAT16_VALUE(115.710) }, + { SIMDE_FLOAT16_VALUE(-9.9), + { SIMDE_FLOAT16_VALUE(3.6), SIMDE_FLOAT16_VALUE(-13.7), SIMDE_FLOAT16_VALUE(-3.5), SIMDE_FLOAT16_VALUE(14.4) }, + INT8_C(3), + SIMDE_FLOAT16_VALUE(-142.560) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16_t r; + SIMDE_CONSTIFY_4_(simde_vmulh_lane_f16, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, test_vec[i].a, b); + simde_assert_equal_f16(r, test_vec[i].r, 1); + } + + return 0; +} + static int test_simde_vmuls_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1824,6 +1986,51 @@ test_simde_vmuls_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vmulh_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a; + simde_float16_t b[8]; + int lane; + simde_float16_t r; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE(9.2), + { SIMDE_FLOAT16_VALUE(7.8), SIMDE_FLOAT16_VALUE(-2.1), SIMDE_FLOAT16_VALUE(6.4), SIMDE_FLOAT16_VALUE(-8.2), + SIMDE_FLOAT16_VALUE(10.0), SIMDE_FLOAT16_VALUE(1.3), SIMDE_FLOAT16_VALUE(-13.3), SIMDE_FLOAT16_VALUE(3.9) }, + INT8_C(1), + SIMDE_FLOAT16_VALUE(-19.320) }, + { SIMDE_FLOAT16_VALUE(7.7), + { SIMDE_FLOAT16_VALUE(-14.5), SIMDE_FLOAT16_VALUE(-4.9), SIMDE_FLOAT16_VALUE(13.4), SIMDE_FLOAT16_VALUE(14.6), + SIMDE_FLOAT16_VALUE(-12.9), SIMDE_FLOAT16_VALUE(-0.6), SIMDE_FLOAT16_VALUE(-7.4), SIMDE_FLOAT16_VALUE(6.6) }, + INT8_C(7), + SIMDE_FLOAT16_VALUE(50.820) }, + { SIMDE_FLOAT16_VALUE(8.5), + { SIMDE_FLOAT16_VALUE(5.6), SIMDE_FLOAT16_VALUE(-4.6), SIMDE_FLOAT16_VALUE(5.9), SIMDE_FLOAT16_VALUE(-11.4), + SIMDE_FLOAT16_VALUE(-11.6), SIMDE_FLOAT16_VALUE(7.8), SIMDE_FLOAT16_VALUE(-2.7), SIMDE_FLOAT16_VALUE(-4.8) }, + INT8_C(6), + SIMDE_FLOAT16_VALUE(-22.950) }, + { SIMDE_FLOAT16_VALUE(-4.8), + { SIMDE_FLOAT16_VALUE(-1.8), SIMDE_FLOAT16_VALUE(-3.2), SIMDE_FLOAT16_VALUE(14.6), SIMDE_FLOAT16_VALUE(-5.4), + SIMDE_FLOAT16_VALUE(-12.8), SIMDE_FLOAT16_VALUE(-8.3), SIMDE_FLOAT16_VALUE(9.9), SIMDE_FLOAT16_VALUE(-2.3) }, + INT8_C(6), + SIMDE_FLOAT16_VALUE(-47.520) }, + { SIMDE_FLOAT16_VALUE(9.9), + { SIMDE_FLOAT16_VALUE(-5.8), SIMDE_FLOAT16_VALUE(4.5), SIMDE_FLOAT16_VALUE(-0.2), SIMDE_FLOAT16_VALUE(12.2), + SIMDE_FLOAT16_VALUE(13.1), SIMDE_FLOAT16_VALUE(-4.2), SIMDE_FLOAT16_VALUE(-4.2), SIMDE_FLOAT16_VALUE(-7.1) }, + INT8_C(0), + SIMDE_FLOAT16_VALUE(-57.420) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16_t r; + SIMDE_CONSTIFY_8_(simde_vmulh_laneq_f16, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, test_vec[i].a, b); + simde_assert_equal_f16(r, test_vec[i].r, 1); + } + + return 0; +} + static int test_simde_vmuls_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1893,6 +2100,54 @@ test_simde_vmuls_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vmul_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + simde_float16 b[8]; + int lane; + simde_float16 r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(-2.3), SIMDE_FLOAT16_VALUE(-2.7), SIMDE_FLOAT16_VALUE(0.1), SIMDE_FLOAT16_VALUE(6.2) }, + { SIMDE_FLOAT16_VALUE(-4.0), SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(-5.2), SIMDE_FLOAT16_VALUE(6.9), + SIMDE_FLOAT16_VALUE(8.4), SIMDE_FLOAT16_VALUE(2.6), SIMDE_FLOAT16_VALUE(-8.1), SIMDE_FLOAT16_VALUE(-9.4) }, + INT8_C(3), + { SIMDE_FLOAT16_VALUE(-15.870), SIMDE_FLOAT16_VALUE(-18.630), SIMDE_FLOAT16_VALUE(0.690), SIMDE_FLOAT16_VALUE(42.780) } }, + { { SIMDE_FLOAT16_VALUE(-7.8), SIMDE_FLOAT16_VALUE(5.4), SIMDE_FLOAT16_VALUE(8.4), SIMDE_FLOAT16_VALUE(-2.2) }, + { SIMDE_FLOAT16_VALUE(4.8), SIMDE_FLOAT16_VALUE(-5.0), SIMDE_FLOAT16_VALUE(-2.1), SIMDE_FLOAT16_VALUE(-6.0), + SIMDE_FLOAT16_VALUE(-7.9), SIMDE_FLOAT16_VALUE(-9.2), SIMDE_FLOAT16_VALUE(5.4), SIMDE_FLOAT16_VALUE(-9.1) }, + INT8_C(4), + { SIMDE_FLOAT16_VALUE(61.620), SIMDE_FLOAT16_VALUE(-42.660), SIMDE_FLOAT16_VALUE(-66.360), SIMDE_FLOAT16_VALUE(17.380) } }, + { { SIMDE_FLOAT16_VALUE(7.7), SIMDE_FLOAT16_VALUE(-1.8), SIMDE_FLOAT16_VALUE(-6.4), SIMDE_FLOAT16_VALUE(-1.7) }, + { SIMDE_FLOAT16_VALUE(7.6), SIMDE_FLOAT16_VALUE(-3.0), SIMDE_FLOAT16_VALUE(-4.4), SIMDE_FLOAT16_VALUE(-8.0), + SIMDE_FLOAT16_VALUE(-4.5), SIMDE_FLOAT16_VALUE(-7.9), SIMDE_FLOAT16_VALUE(6.1), SIMDE_FLOAT16_VALUE(-5.3) }, + INT8_C(3), + { SIMDE_FLOAT16_VALUE(-61.600), SIMDE_FLOAT16_VALUE(14.400), SIMDE_FLOAT16_VALUE(51.200), SIMDE_FLOAT16_VALUE(13.600) } }, + { { SIMDE_FLOAT16_VALUE(-5.9), SIMDE_FLOAT16_VALUE(0.9), SIMDE_FLOAT16_VALUE(2.3), SIMDE_FLOAT16_VALUE(9.1) }, + { SIMDE_FLOAT16_VALUE(-0.5), SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(-1.4), SIMDE_FLOAT16_VALUE(3.7), + SIMDE_FLOAT16_VALUE(4.4), SIMDE_FLOAT16_VALUE(4.5), SIMDE_FLOAT16_VALUE(-0.5), SIMDE_FLOAT16_VALUE(6.5) }, + INT8_C(2), + { SIMDE_FLOAT16_VALUE(8.260), SIMDE_FLOAT16_VALUE(-1.260), SIMDE_FLOAT16_VALUE(-3.220), SIMDE_FLOAT16_VALUE(-12.740) } }, + { { SIMDE_FLOAT16_VALUE(1.6), SIMDE_FLOAT16_VALUE(6.5), SIMDE_FLOAT16_VALUE(-4.1), SIMDE_FLOAT16_VALUE(-1.1) }, + { SIMDE_FLOAT16_VALUE(-4.6), SIMDE_FLOAT16_VALUE(9.8), SIMDE_FLOAT16_VALUE(-4.3), SIMDE_FLOAT16_VALUE(-3.0), + SIMDE_FLOAT16_VALUE(-1.0), SIMDE_FLOAT16_VALUE(-7.7), SIMDE_FLOAT16_VALUE(2.3), SIMDE_FLOAT16_VALUE(-0.6) }, + INT8_C(3), + { SIMDE_FLOAT16_VALUE(-4.800), SIMDE_FLOAT16_VALUE(-19.500), SIMDE_FLOAT16_VALUE(12.300), SIMDE_FLOAT16_VALUE(3.300) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + + simde_float16x4_t r; + SIMDE_CONSTIFY_8_(simde_vmul_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, a, b); + + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + } + + return 0; +} + static int test_simde_vmul_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -2042,6 +2297,7 @@ test_simde_vmul_laneq_f64 (SIMDE_MUNIT_TEST_ARGS) { HEDLEY_DIAGNOSTIC_POP SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vmul_lane_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_lane_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_lane_s16) @@ -2062,6 +2318,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_lane_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_lane_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_laneq_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_laneq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_laneq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_laneq_s16) @@ -2073,7 +2330,10 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vmuld_lane_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vmuld_laneq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vmuls_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmuls_laneq_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vmulh_lane_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmulh_laneq_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vmul_laneq_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_laneq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_laneq_f64) SIMDE_TEST_FUNC_LIST_END diff --git a/test/arm/neon/neg.c b/test/arm/neon/neg.c index fcea389f4..0981e4092 100644 --- a/test/arm/neon/neg.c +++ b/test/arm/neon/neg.c @@ -3,6 +3,123 @@ #include "test-neon.h" #include "../../../simde/arm/neon/neg.h" +static int +test_simde_vnegh_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a; + simde_float16 r; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE(5.08), + SIMDE_FLOAT16_VALUE(-5.08) }, + { SIMDE_FLOAT16_VALUE(-9.37), + SIMDE_FLOAT16_VALUE(9.37) }, + { SIMDE_FLOAT16_VALUE(4.40), + SIMDE_FLOAT16_VALUE(-4.40) }, + { SIMDE_FLOAT16_VALUE(7.96), + SIMDE_FLOAT16_VALUE(-7.96) }, + { SIMDE_FLOAT16_VALUE(-0.09), + SIMDE_FLOAT16_VALUE(0.09) }, + { SIMDE_FLOAT16_VALUE(-7.21), + SIMDE_FLOAT16_VALUE(7.21) }, + { SIMDE_FLOAT16_VALUE(9.94), + SIMDE_FLOAT16_VALUE(-9.94) }, + { SIMDE_FLOAT16_VALUE(9.11), + SIMDE_FLOAT16_VALUE(-9.11) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16_t r = simde_vnegh_f16(test_vec[i].a); + + simde_assert_equal_f16(r, test_vec[i].r, 1); + } + + return 0; +} + +static int +test_simde_vneg_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + simde_float16 r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(3.76), SIMDE_FLOAT16_VALUE(4.26), SIMDE_FLOAT16_VALUE(0.04), SIMDE_FLOAT16_VALUE(-1.27) }, + { SIMDE_FLOAT16_VALUE(-3.76), SIMDE_FLOAT16_VALUE(-4.26), SIMDE_FLOAT16_VALUE(-0.04), SIMDE_FLOAT16_VALUE(1.27) } }, + { { SIMDE_FLOAT16_VALUE(-4.66), SIMDE_FLOAT16_VALUE(-2.63), SIMDE_FLOAT16_VALUE(4.43), SIMDE_FLOAT16_VALUE(-1.51) }, + { SIMDE_FLOAT16_VALUE(4.66), SIMDE_FLOAT16_VALUE(2.63), SIMDE_FLOAT16_VALUE(-4.43), SIMDE_FLOAT16_VALUE(1.51) } }, + { { SIMDE_FLOAT16_VALUE(0.13), SIMDE_FLOAT16_VALUE(-2.36), SIMDE_FLOAT16_VALUE(2.44), SIMDE_FLOAT16_VALUE(1.42) }, + { SIMDE_FLOAT16_VALUE(-0.13), SIMDE_FLOAT16_VALUE(2.36), SIMDE_FLOAT16_VALUE(-2.44), SIMDE_FLOAT16_VALUE(-1.42) } }, + { { SIMDE_FLOAT16_VALUE(-3.22), SIMDE_FLOAT16_VALUE(-6.50), SIMDE_FLOAT16_VALUE(-1.12), SIMDE_FLOAT16_VALUE(-7.58) }, + { SIMDE_FLOAT16_VALUE(3.22), SIMDE_FLOAT16_VALUE(6.50), SIMDE_FLOAT16_VALUE(1.12), SIMDE_FLOAT16_VALUE(7.58) } }, + { { SIMDE_FLOAT16_VALUE(3.07), SIMDE_FLOAT16_VALUE(-4.77), SIMDE_FLOAT16_VALUE(6.72), SIMDE_FLOAT16_VALUE(-2.02) }, + { SIMDE_FLOAT16_VALUE(-3.07), SIMDE_FLOAT16_VALUE(4.77), SIMDE_FLOAT16_VALUE(-6.72), SIMDE_FLOAT16_VALUE(2.02) } }, + { { SIMDE_FLOAT16_VALUE(-7.83), SIMDE_FLOAT16_VALUE(-9.99), SIMDE_FLOAT16_VALUE(-8.44), SIMDE_FLOAT16_VALUE(9.22) }, + { SIMDE_FLOAT16_VALUE(7.83), SIMDE_FLOAT16_VALUE(9.99), SIMDE_FLOAT16_VALUE(8.44), SIMDE_FLOAT16_VALUE(-9.22) } }, + { { SIMDE_FLOAT16_VALUE(7.97), SIMDE_FLOAT16_VALUE(4.72), SIMDE_FLOAT16_VALUE(0.39), SIMDE_FLOAT16_VALUE(6.01) }, + { SIMDE_FLOAT16_VALUE(-7.97), SIMDE_FLOAT16_VALUE(-4.72), SIMDE_FLOAT16_VALUE(-0.39), SIMDE_FLOAT16_VALUE(-6.01) } }, + { { SIMDE_FLOAT16_VALUE(4.00), SIMDE_FLOAT16_VALUE(7.89), SIMDE_FLOAT16_VALUE(3.62), SIMDE_FLOAT16_VALUE(-0.22) }, + { SIMDE_FLOAT16_VALUE(-4.00), SIMDE_FLOAT16_VALUE(-7.89), SIMDE_FLOAT16_VALUE(-3.62), SIMDE_FLOAT16_VALUE(0.22) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t r = simde_vneg_f16(a); + + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + } + + return 0; +} + +static int +test_simde_vnegq_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[8]; + simde_float16 r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE(3.86), SIMDE_FLOAT16_VALUE(-7.04), SIMDE_FLOAT16_VALUE(-0.07), SIMDE_FLOAT16_VALUE(-5.19), + SIMDE_FLOAT16_VALUE(-9.22), SIMDE_FLOAT16_VALUE(-9.12), SIMDE_FLOAT16_VALUE(7.75), SIMDE_FLOAT16_VALUE(5.97) }, + { SIMDE_FLOAT16_VALUE(-3.86), SIMDE_FLOAT16_VALUE(7.04), SIMDE_FLOAT16_VALUE(0.07), SIMDE_FLOAT16_VALUE(5.19), + SIMDE_FLOAT16_VALUE(9.22), SIMDE_FLOAT16_VALUE(9.12), SIMDE_FLOAT16_VALUE(-7.75), SIMDE_FLOAT16_VALUE(-5.97) } }, + { { SIMDE_FLOAT16_VALUE(-8.27), SIMDE_FLOAT16_VALUE(-9.57), SIMDE_FLOAT16_VALUE(-5.62), SIMDE_FLOAT16_VALUE(9.12), + SIMDE_FLOAT16_VALUE(9.92), SIMDE_FLOAT16_VALUE(9.39), SIMDE_FLOAT16_VALUE(2.30), SIMDE_FLOAT16_VALUE(6.87) }, + { SIMDE_FLOAT16_VALUE(8.27), SIMDE_FLOAT16_VALUE(9.57), SIMDE_FLOAT16_VALUE(5.62), SIMDE_FLOAT16_VALUE(-9.12), + SIMDE_FLOAT16_VALUE(-9.92), SIMDE_FLOAT16_VALUE(-9.39), SIMDE_FLOAT16_VALUE(-2.30), SIMDE_FLOAT16_VALUE(-6.87) } }, + { { SIMDE_FLOAT16_VALUE(8.40), SIMDE_FLOAT16_VALUE(5.82), SIMDE_FLOAT16_VALUE(4.38), SIMDE_FLOAT16_VALUE(-5.40), + SIMDE_FLOAT16_VALUE(-6.07), SIMDE_FLOAT16_VALUE(4.66), SIMDE_FLOAT16_VALUE(0.72), SIMDE_FLOAT16_VALUE(-5.19) }, + { SIMDE_FLOAT16_VALUE(-8.40), SIMDE_FLOAT16_VALUE(-5.82), SIMDE_FLOAT16_VALUE(-4.38), SIMDE_FLOAT16_VALUE(5.40), + SIMDE_FLOAT16_VALUE(6.07), SIMDE_FLOAT16_VALUE(-4.66), SIMDE_FLOAT16_VALUE(-0.72), SIMDE_FLOAT16_VALUE(5.19) } }, + { { SIMDE_FLOAT16_VALUE(-5.48), SIMDE_FLOAT16_VALUE(-2.52), SIMDE_FLOAT16_VALUE(-7.59), SIMDE_FLOAT16_VALUE(-2.00), + SIMDE_FLOAT16_VALUE(-0.60), SIMDE_FLOAT16_VALUE(-5.50), SIMDE_FLOAT16_VALUE(-3.14), SIMDE_FLOAT16_VALUE(-7.01) }, + { SIMDE_FLOAT16_VALUE(5.48), SIMDE_FLOAT16_VALUE(2.52), SIMDE_FLOAT16_VALUE(7.59), SIMDE_FLOAT16_VALUE(2.00), + SIMDE_FLOAT16_VALUE(0.60), SIMDE_FLOAT16_VALUE(5.50), SIMDE_FLOAT16_VALUE(3.14), SIMDE_FLOAT16_VALUE(7.01) } }, + { { SIMDE_FLOAT16_VALUE(7.90), SIMDE_FLOAT16_VALUE(7.99), SIMDE_FLOAT16_VALUE(5.87), SIMDE_FLOAT16_VALUE(-5.79), + SIMDE_FLOAT16_VALUE(8.13), SIMDE_FLOAT16_VALUE(1.99), SIMDE_FLOAT16_VALUE(5.19), SIMDE_FLOAT16_VALUE(-2.41) }, + { SIMDE_FLOAT16_VALUE(-7.90), SIMDE_FLOAT16_VALUE(-7.99), SIMDE_FLOAT16_VALUE(-5.87), SIMDE_FLOAT16_VALUE(5.79), + SIMDE_FLOAT16_VALUE(-8.13), SIMDE_FLOAT16_VALUE(-1.99), SIMDE_FLOAT16_VALUE(-5.19), SIMDE_FLOAT16_VALUE(2.41) } }, + { { SIMDE_FLOAT16_VALUE(8.20), SIMDE_FLOAT16_VALUE(-4.45), SIMDE_FLOAT16_VALUE(5.77), SIMDE_FLOAT16_VALUE(3.26), + SIMDE_FLOAT16_VALUE(-8.62), SIMDE_FLOAT16_VALUE(5.18), SIMDE_FLOAT16_VALUE(-2.97), SIMDE_FLOAT16_VALUE(-6.39) }, + { SIMDE_FLOAT16_VALUE(-8.20), SIMDE_FLOAT16_VALUE(4.45), SIMDE_FLOAT16_VALUE(-5.77), SIMDE_FLOAT16_VALUE(-3.26), + SIMDE_FLOAT16_VALUE(8.62), SIMDE_FLOAT16_VALUE(-5.18), SIMDE_FLOAT16_VALUE(2.97), SIMDE_FLOAT16_VALUE(6.39) } }, + { { SIMDE_FLOAT16_VALUE(4.61), SIMDE_FLOAT16_VALUE(7.91), SIMDE_FLOAT16_VALUE(-4.45), SIMDE_FLOAT16_VALUE(4.55), + SIMDE_FLOAT16_VALUE(-7.34), SIMDE_FLOAT16_VALUE(8.67), SIMDE_FLOAT16_VALUE(6.04), SIMDE_FLOAT16_VALUE(0.69) }, + { SIMDE_FLOAT16_VALUE(-4.61), SIMDE_FLOAT16_VALUE(-7.91), SIMDE_FLOAT16_VALUE(4.45), SIMDE_FLOAT16_VALUE(-4.55), + SIMDE_FLOAT16_VALUE(7.34), SIMDE_FLOAT16_VALUE(-8.67), SIMDE_FLOAT16_VALUE(-6.04), SIMDE_FLOAT16_VALUE(-0.69) } }, + { { SIMDE_FLOAT16_VALUE(3.21), SIMDE_FLOAT16_VALUE(-0.24), SIMDE_FLOAT16_VALUE(2.01), SIMDE_FLOAT16_VALUE(-4.17), + SIMDE_FLOAT16_VALUE(-2.50), SIMDE_FLOAT16_VALUE(2.98), SIMDE_FLOAT16_VALUE(0.42), SIMDE_FLOAT16_VALUE(1.50) }, + { SIMDE_FLOAT16_VALUE(-3.21), SIMDE_FLOAT16_VALUE(0.24), SIMDE_FLOAT16_VALUE(-2.01), SIMDE_FLOAT16_VALUE(4.17), + SIMDE_FLOAT16_VALUE(2.50), SIMDE_FLOAT16_VALUE(-2.98), SIMDE_FLOAT16_VALUE(-0.42), SIMDE_FLOAT16_VALUE(-1.50) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t r = simde_vnegq_f16(a); + + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + } + + return 0; +} + static int test_simde_vneg_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -569,6 +686,8 @@ test_simde_vnegq_s64 (SIMDE_MUNIT_TEST_ARGS) { } SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vnegh_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vneg_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vneg_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vneg_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vneg_s8) @@ -576,6 +695,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vneg_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vneg_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vneg_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vnegq_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vnegq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vnegq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vnegq_s8) diff --git a/test/arm/neon/qdmlal.c b/test/arm/neon/qdmlal.c new file mode 100644 index 000000000..d33e449d1 --- /dev/null +++ b/test/arm/neon/qdmlal.c @@ -0,0 +1,224 @@ +#define SIMDE_TEST_ARM_NEON_INSN qdmlal + +#include "test-neon.h" +#include "../../../simde/arm/neon/qdmlal.h" + +static int +test_simde_vqdmlalh_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[1]; + int16_t b[1]; + int16_t c[1]; + int32_t r[1]; + } test_vec[] = { + { { -INT32_C( 6191349) }, + { INT16_C( 8935) }, + { -INT16_C( 976) }, + { -INT32_C( 23632469) } }, + { { -INT32_C( 8133285) }, + { INT16_C( 6661) }, + { INT16_C( 904) }, + { INT32_C( 3909803) } }, + { { -INT32_C( 6622410) }, + { -INT16_C( 9110) }, + { -INT16_C( 9860) }, + { INT32_C( 173026790) } }, + { { INT32_C( 2865485) }, + { INT16_C( 139) }, + { -INT16_C( 8277) }, + { INT32_C( 564479) } }, + { { -INT32_C( 2994139) }, + { INT16_C( 4971) }, + { INT16_C( 7745) }, + { INT32_C( 74006651) } }, + { { INT32_C( 1073049) }, + { -INT16_C( 3760) }, + { INT16_C( 9171) }, + { -INT32_C( 67892871) } }, + { { INT32_C( 4306165) }, + { -INT16_C( 3518) }, + { INT16_C( 2822) }, + { -INT32_C( 15549427) } }, + { { INT32_C( 2683498) }, + { INT16_C( 6764) }, + { -INT16_C( 707) }, + { -INT32_C( 6880798) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int32_t r = simde_vqdmlalh_s16(test_vec[i].a[0], test_vec[i].b[0], test_vec[i].c[0]); + + simde_assert_equal_i32(r, test_vec[i].r[0]); + } + + return 0; +} + +static int +test_simde_vqdmlals_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[1]; + int32_t b[1]; + int32_t c[1]; + int64_t r[1]; + } test_vec[] = { + { { INT64_C( 994701330) }, + { -INT32_C( 8142695) }, + { -INT32_C( 6476313) }, + { INT64_C(105470277668400) } }, + { { -INT64_C( 250315414) }, + { -INT32_C( 7860762) }, + { -INT32_C( 6684612) }, + { INT64_C(105092037673274) } }, + { { -INT64_C( 388043893) }, + { -INT32_C( 5044219) }, + { INT32_C( 105388) }, + { -INT64_C( 1063588347837) } }, + { { INT64_C( 756231599) }, + { INT32_C( 6764338) }, + { INT32_C( 8602282) }, + { INT64_C(116378242270231) } }, + { { INT64_C( 653109401) }, + { INT32_C( 4179167) }, + { -INT32_C( 486101) }, + { -INT64_C( 4062341406333) } }, + { { -INT64_C( 274319077) }, + { -INT32_C( 6189685) }, + { INT32_C( 9485006) }, + { -INT64_C(117418673045297) } }, + { { INT64_C( 789989051) }, + { INT32_C( 4846033) }, + { -INT32_C( 6660847) }, + { -INT64_C( 64556578750851) } }, + { { -INT64_C( 835469905) }, + { INT32_C( 2995714) }, + { -INT32_C( 3814223) }, + { -INT64_C( 22853477950349) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int64_t r = simde_vqdmlals_s32(test_vec[i].a[0], test_vec[i].b[0], test_vec[i].c[0]); + + simde_assert_equal_i64(r, test_vec[i].r[0]); + } + + return 0; +} + +static int +test_simde_vqdmlal_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[4]; + int16_t c[4]; + int32_t r[4]; + } test_vec[] = { + { { INT32_C( 47994), -INT32_C( 20778), INT32_C( 33097), -INT32_C( 10928) }, + { -INT16_C( 2031), -INT16_C( 4604), -INT16_C( 7758), INT16_C( 1292) }, + { -INT16_C( 268), -INT16_C( 6686), INT16_C( 8116), -INT16_C( 337) }, + { INT32_C( 1136610), INT32_C( 61543910), -INT32_C( 125894759), -INT32_C( 881736) } }, + { { -INT32_C( 78165), -INT32_C( 27105), INT32_C( 1998), -INT32_C( 94511) }, + { -INT16_C( 4801), -INT16_C( 5450), -INT16_C( 9223), INT16_C( 3371) }, + { -INT16_C( 8233), -INT16_C( 4388), INT16_C( 9382), -INT16_C( 258) }, + { INT32_C( 78975101), INT32_C( 47802095), -INT32_C( 173058374), -INT32_C( 1833947) } }, + { { -INT32_C( 43334), -INT32_C( 41712), INT32_C( 44640), -INT32_C( 92818) }, + { INT16_C( 4906), -INT16_C( 4840), -INT16_C( 749), -INT16_C( 6353) }, + { -INT16_C( 3678), INT16_C( 1558), INT16_C( 9974), -INT16_C( 6944) }, + { -INT32_C( 36131870), -INT32_C( 15123152), -INT32_C( 14896412), INT32_C( 88137646) } }, + { { -INT32_C( 22346), INT32_C( 40130), INT32_C( 89898), -INT32_C( 1442) }, + { INT16_C( 832), INT16_C( 1545), INT16_C( 7545), -INT16_C( 37) }, + { -INT16_C( 3717), INT16_C( 8015), INT16_C( 1913), -INT16_C( 4544) }, + { -INT32_C( 6207434), INT32_C( 24806480), INT32_C( 28957068), INT32_C( 334814) } }, + { { INT32_C( 25579), -INT32_C( 44904), INT32_C( 98978), -INT32_C( 37785) }, + { -INT16_C( 5427), -INT16_C( 9421), INT16_C( 4768), INT16_C( 287) }, + { -INT16_C( 3431), -INT16_C( 5785), INT16_C( 5695), INT16_C( 5829) }, + { INT32_C( 37265653), INT32_C( 108956066), INT32_C( 54406498), INT32_C( 3308061) } }, + { { INT32_C( 97923), INT32_C( 92399), INT32_C( 89588), -INT32_C( 79825) }, + { INT16_C( 9641), INT16_C( 7675), -INT16_C( 7751), INT16_C( 7520) }, + { INT16_C( 4626), INT16_C( 5458), -INT16_C( 7529), -INT16_C( 9852) }, + { INT32_C( 89296455), INT32_C( 83872699), INT32_C( 116804146), -INT32_C( 148253905) } }, + { { INT32_C( 98862), -INT32_C( 1896), INT32_C( 65060), INT32_C( 36938) }, + { -INT16_C( 2865), -INT16_C( 9424), -INT16_C( 8867), -INT16_C( 3713) }, + { -INT16_C( 9160), -INT16_C( 7328), -INT16_C( 708), INT16_C( 2633) }, + { INT32_C( 52585662), INT32_C( 138116248), INT32_C( 12620732), -INT32_C( 19515720) } }, + { { INT32_C( 78126), INT32_C( 7840), INT32_C( 80240), -INT32_C( 68184) }, + { INT16_C( 8642), -INT16_C( 579), INT16_C( 2963), INT16_C( 9252) }, + { -INT16_C( 7314), -INT16_C( 5230), INT16_C( 8688), INT16_C( 5749) }, + { -INT32_C( 126337050), INT32_C( 6064180), INT32_C( 51565328), INT32_C( 106311312) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); + simde_int16x4_t c = simde_vld1_s16(test_vec[i].c); + simde_int32x4_t r = simde_vqdmlal_s16(a, b, c); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmlal_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[2]; + int32_t c[2]; + int64_t r[2]; + } test_vec[] = { + { { INT64_C( 845205397), INT64_C( 71618730) }, + { -INT32_C( 8506110), -INT32_C( 5997969) }, + { -INT32_C( 755191), INT32_C( 8695866) }, + { INT64_C( 12848320639417), -INT64_C(104314997773578) } }, + { { -INT64_C( 459781408), INT64_C( 49282602) }, + { INT32_C( 3947837), -INT32_C( 8389725) }, + { -INT32_C( 5582373), -INT32_C( 8467379) }, + { -INT64_C( 44077057135810), INT64_C(142078011844152) } }, + { { -INT64_C( 798562048), INT64_C( 6909560) }, + { INT32_C( 5205898), INT32_C( 5769494) }, + { INT32_C( 2380672), INT32_C( 9432366) }, + { INT64_C( 24786272644864), INT64_C(108839964995168) } }, + { { -INT64_C( 972557647), -INT64_C( 839281959) }, + { INT32_C( 545324), -INT32_C( 3103272) }, + { -INT32_C( 8512777), -INT32_C( 2636705) }, + { -INT64_C( 9285415767143), INT64_C( 16363986315561) } }, + { { -INT64_C( 928911634), INT64_C( 842482216) }, + { -INT32_C( 5349220), INT32_C( 7281630) }, + { INT32_C( 3418587), INT32_C( 9455476) }, + { -INT64_C( 36574476815914), INT64_C(137703397893976) } }, + { { -INT64_C( 647837823), -INT64_C( 637410506) }, + { -INT32_C( 2946260), -INT32_C( 534877) }, + { -INT32_C( 7636201), -INT32_C( 6715356) }, + { INT64_C( 44995819278697), INT64_C( 7183141531918) } }, + { { -INT64_C( 841474483), INT64_C( 164819968) }, + { INT32_C( 2974160), -INT32_C( 9291319) }, + { INT32_C( 2611225), INT32_C( 3255629) }, + { INT64_C( 15531560417517), -INT64_C( 60498010349334) } }, + { { INT64_C( 641627970), INT64_C( 735920203) }, + { -INT32_C( 8397045), INT32_C( 6142639) }, + { -INT32_C( 6977990), -INT32_C( 5600341) }, + { INT64_C(117189633707070), -INT64_C( 68801010159595) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); + simde_int32x2_t c = simde_vld1_s32(test_vec[i].c); + simde_int64x2_t r = simde_vqdmlal_s32(a, b, c); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlalh_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlals_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlal_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlal_s32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qdmlal_high.c b/test/arm/neon/qdmlal_high.c new file mode 100644 index 000000000..c928e05c6 --- /dev/null +++ b/test/arm/neon/qdmlal_high.c @@ -0,0 +1,136 @@ +#define SIMDE_TEST_ARM_NEON_INSN qdmlal_high + +#include "test-neon.h" +#include "../../../simde/arm/neon/qdmlal_high.h" + +static int +test_simde_vqdmlal_high_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[8]; + int16_t c[8]; + int32_t r[4]; + } test_vec[] = { + { { -INT32_C( 4008683), -INT32_C( 5028178), -INT32_C( 8962997), INT32_C( 2551757) }, + { INT16_C( 5202), -INT16_C( 4550), -INT16_C( 5993), -INT16_C( 4270), + -INT16_C( 1887), INT16_C( 3575), INT16_C( 5768), -INT16_C( 307) }, + { INT16_C( 6131), -INT16_C( 5050), -INT16_C( 190), -INT16_C( 7932), + INT16_C( 8515), INT16_C( 8459), -INT16_C( 6993), -INT16_C( 9367) }, + { -INT32_C( 36144293), INT32_C( 55453672), -INT32_C( 89634245), INT32_C( 8303095) } }, + { { -INT32_C( 2170289), -INT32_C( 281565), INT32_C( 3343964), -INT32_C( 2785758) }, + { INT16_C( 4163), INT16_C( 8019), INT16_C( 3543), INT16_C( 72), + INT16_C( 4500), -INT16_C( 4727), INT16_C( 1520), -INT16_C( 9563) }, + { INT16_C( 8885), -INT16_C( 6877), -INT16_C( 685), -INT16_C( 5459), + INT16_C( 5251), INT16_C( 2695), INT16_C( 5168), -INT16_C( 338) }, + { INT32_C( 45088711), -INT32_C( 25760095), INT32_C( 19054684), INT32_C( 3678830) } }, + { { -INT32_C( 4553302), INT32_C( 3125108), -INT32_C( 2065657), -INT32_C( 8992153) }, + { INT16_C( 3431), -INT16_C( 7556), -INT16_C( 6557), -INT16_C( 6673), + -INT16_C( 5314), INT16_C( 5325), INT16_C( 6866), -INT16_C( 2847) }, + { -INT16_C( 3580), INT16_C( 1547), -INT16_C( 6342), INT16_C( 7251), + INT16_C( 2064), INT16_C( 9316), -INT16_C( 6342), -INT16_C( 6417) }, + { -INT32_C( 26489494), INT32_C( 102340508), -INT32_C( 89154001), INT32_C( 27546245) } }, + { { -INT32_C( 484890), -INT32_C( 2201981), INT32_C( 3679408), INT32_C( 1853540) }, + { INT16_C( 3856), INT16_C( 7814), -INT16_C( 1769), INT16_C( 1436), + INT16_C( 3444), -INT16_C( 9841), -INT16_C( 3411), INT16_C( 8367) }, + { INT16_C( 1142), INT16_C( 2589), INT16_C( 1678), INT16_C( 1142), + -INT16_C( 5847), INT16_C( 6564), INT16_C( 7556), INT16_C( 3693) }, + { -INT32_C( 40759026), -INT32_C( 131394629), -INT32_C( 47867624), INT32_C( 63652202) } }, + { { INT32_C( 2121238), INT32_C( 9877268), -INT32_C( 1143534), -INT32_C( 9498808) }, + { INT16_C( 5303), -INT16_C( 1950), -INT16_C( 9312), INT16_C( 5725), + -INT16_C( 6481), -INT16_C( 780), INT16_C( 5191), -INT16_C( 563) }, + { INT16_C( 8677), -INT16_C( 6810), -INT16_C( 8755), -INT16_C( 8531), + INT16_C( 820), INT16_C( 2131), INT16_C( 5933), INT16_C( 2473) }, + { -INT32_C( 8507602), INT32_C( 6552908), INT32_C( 60452872), -INT32_C( 12283406) } }, + { { -INT32_C( 3147561), INT32_C( 5096455), -INT32_C( 7864011), -INT32_C( 8639771) }, + { -INT16_C( 6393), -INT16_C( 6974), INT16_C( 1116), -INT16_C( 9693), + INT16_C( 9938), -INT16_C( 8485), INT16_C( 9800), INT16_C( 2177) }, + { INT16_C( 369), INT16_C( 3864), -INT16_C( 1403), INT16_C( 8739), + INT16_C( 9358), INT16_C( 4743), -INT16_C( 9043), INT16_C( 9839) }, + { INT32_C( 182852047), -INT32_C( 75392255), -INT32_C( 185106811), INT32_C( 34199235) } }, + { { INT32_C( 2379833), INT32_C( 4584653), -INT32_C( 2621237), INT32_C( 775961) }, + { INT16_C( 5859), INT16_C( 3035), -INT16_C( 5242), -INT16_C( 5764), + INT16_C( 6220), INT16_C( 7190), -INT16_C( 6485), -INT16_C( 4729) }, + { INT16_C( 3318), -INT16_C( 2580), -INT16_C( 7939), INT16_C( 6236), + INT16_C( 1562), -INT16_C( 4136), INT16_C( 2163), -INT16_C( 4317) }, + { INT32_C( 21811113), -INT32_C( 54891027), -INT32_C( 30675347), INT32_C( 41606147) } }, + { { INT32_C( 6237148), -INT32_C( 9768512), -INT32_C( 6859928), INT32_C( 5368290) }, + { -INT16_C( 9903), -INT16_C( 7336), INT16_C( 1785), INT16_C( 5751), + -INT16_C( 7034), INT16_C( 5612), INT16_C( 5698), -INT16_C( 5535) }, + { -INT16_C( 9269), -INT16_C( 5310), INT16_C( 5746), INT16_C( 4013), + INT16_C( 5760), INT16_C( 4110), INT16_C( 8914), -INT16_C( 764) }, + { -INT32_C( 74794532), INT32_C( 36362128), INT32_C( 94724016), INT32_C( 13825770) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + simde_int16x8_t c = simde_vld1q_s16(test_vec[i].c); + simde_int32x4_t r = simde_vqdmlal_high_s16(a, b, c); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmlal_high_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[4]; + int32_t c[4]; + int64_t r[2]; + } test_vec[] = { + { { INT64_C( 795560267), INT64_C( 507739276) }, + { -INT32_C( 988690), INT32_C( 908701), -INT32_C( 284474), INT32_C( 671690) }, + { INT32_C( 117297), INT32_C( 669124), -INT32_C( 739630), -INT32_C( 612370) }, + { INT64_C( 421606569507), -INT64_C( 822137871324) } }, + { { -INT64_C( 199701311), -INT64_C( 239264910) }, + { INT32_C( 660536), INT32_C( 137005), -INT32_C( 440186), -INT32_C( 152398) }, + { -INT32_C( 263430), INT32_C( 859419), INT32_C( 961341), INT32_C( 663492) }, + { -INT64_C( 846537400163), -INT64_C( 202468972542) } }, + { { INT64_C( 564059206), -INT64_C( 412362369) }, + { -INT32_C( 654124), INT32_C( 670018), INT32_C( 286862), -INT32_C( 574625) }, + { INT32_C( 7787), INT32_C( 752510), -INT32_C( 240056), INT32_C( 577510) }, + { -INT64_C( 137161829338), -INT64_C( 664115729869) } }, + { { -INT64_C( 167642734), INT64_C( 893273420) }, + { -INT32_C( 169982), -INT32_C( 374407), -INT32_C( 45505), -INT32_C( 362726) }, + { INT32_C( 241070), INT32_C( 122131), INT32_C( 491343), INT32_C( 110512) }, + { -INT64_C( 44884769164), -INT64_C( 79277878004) } }, + { { INT64_C( 227912254), INT64_C( 7129424) }, + { -INT32_C( 292729), INT32_C( 461415), INT32_C( 188811), INT32_C( 891933) }, + { -INT32_C( 15187), INT32_C( 143937), -INT32_C( 197341), INT32_C( 607886) }, + { -INT64_C( 74292390848), INT64_C( 1084394296700) } }, + { { -INT64_C( 237091799), -INT64_C( 400804847) }, + { -INT32_C( 918731), -INT32_C( 765967), INT32_C( 862619), INT32_C( 190016) }, + { INT32_C( 123092), -INT32_C( 647395), INT32_C( 880966), INT32_C( 285303) }, + { INT64_C( 1519638928109), INT64_C( 108023464849) } }, + { { -INT64_C( 64349374), -INT64_C( 521177021) }, + { -INT32_C( 901925), -INT32_C( 429371), INT32_C( 953612), -INT32_C( 755979) }, + { -INT32_C( 408089), -INT32_C( 658015), -INT32_C( 94549), INT32_C( 666792) }, + { -INT64_C( 180390471350), -INT64_C( 1008682675757) } }, + { { INT64_C( 185193827), -INT64_C( 1569354) }, + { -INT32_C( 759050), -INT32_C( 437291), INT32_C( 207575), -INT32_C( 177006) }, + { -INT32_C( 262650), INT32_C( 912777), INT32_C( 556302), -INT32_C( 41245) }, + { INT64_C( 231133969127), INT64_C( 14599655586) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + simde_int32x4_t c = simde_vld1q_s32(test_vec[i].c); + simde_int64x2_t r = simde_vqdmlal_high_s32(a, b, c); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlal_high_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlal_high_s32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qdmlal_high_lane.c b/test/arm/neon/qdmlal_high_lane.c new file mode 100644 index 000000000..76d93bb86 --- /dev/null +++ b/test/arm/neon/qdmlal_high_lane.c @@ -0,0 +1,295 @@ +#define SIMDE_TEST_ARM_NEON_INSN qdmlal_high_lane + +#include "test-neon.h" +#include "../../../simde/arm/neon/qdmlal_high_lane.h" + +static int +test_simde_vqdmlal_high_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[8]; + int16_t v[4]; + int8_t lane; + int32_t r[4]; + } test_vec[] = { + { { INT32_C( 894353), -INT32_C( 114589), INT32_C( 405183), INT32_C( 768578) }, + { INT16_C( 6491), INT16_C( 3302), -INT16_C( 9424), -INT16_C( 2705), + INT16_C( 7001), INT16_C( 6386), -INT16_C( 9415), INT16_C( 8455) }, + { INT16_C( 4126), INT16_C( 7156), -INT16_C( 4825), -INT16_C( 2552) }, + INT8_C( 3), + { -INT32_C( 34838751), -INT32_C( 32708733), INT32_C( 48459343), -INT32_C( 42385742) } }, + { { INT32_C( 192194), INT32_C( 944576), INT32_C( 519442), -INT32_C( 947446) }, + { INT16_C( 9492), INT16_C( 1554), -INT16_C( 7751), -INT16_C( 5715), + -INT16_C( 4537), INT16_C( 558), -INT16_C( 5640), INT16_C( 3903) }, + { INT16_C( 2026), INT16_C( 2254), INT16_C( 8886), -INT16_C( 7598) }, + INT8_C( 3), + { INT32_C( 69136446), -INT32_C( 7534792), INT32_C( 86224882), -INT32_C( 60257434) } }, + { { INT32_C( 470671), -INT32_C( 18986), -INT32_C( 848425), INT32_C( 579963) }, + { -INT16_C( 1439), INT16_C( 6952), -INT16_C( 4417), -INT16_C( 1989), + INT16_C( 3445), INT16_C( 8250), -INT16_C( 1870), INT16_C( 3800) }, + { INT16_C( 9729), -INT16_C( 7764), -INT16_C( 7560), INT16_C( 950) }, + INT8_C( 3), + { INT32_C( 7016171), INT32_C( 15656014), -INT32_C( 4401425), INT32_C( 7799963) } }, + { { INT32_C( 294955), INT32_C( 650666), INT32_C( 881243), INT32_C( 927961) }, + { -INT16_C( 8925), INT16_C( 2701), INT16_C( 1635), -INT16_C( 9090), + -INT16_C( 2483), -INT16_C( 5072), -INT16_C( 3082), -INT16_C( 9610) }, + { -INT16_C( 6687), -INT16_C( 2609), INT16_C( 8823), INT16_C( 3170) }, + INT8_C( 0), + { INT32_C( 33502597), INT32_C( 68483594), INT32_C( 42099911), INT32_C(129452101) } }, + { { INT32_C( 335787), INT32_C( 53789), INT32_C( 281996), INT32_C( 164669) }, + { INT16_C( 9117), INT16_C( 2965), INT16_C( 5932), -INT16_C( 3598), + -INT16_C( 1940), INT16_C( 8189), -INT16_C( 7301), INT16_C( 9863) }, + { INT16_C( 4314), INT16_C( 1423), -INT16_C( 5807), -INT16_C( 9427) }, + INT8_C( 3), + { INT32_C( 36912547), -INT32_C(154341617), INT32_C(137935050), -INT32_C(185792333) } }, + { { INT32_C( 858893), INT32_C( 643324), INT32_C( 115742), -INT32_C( 44124) }, + { -INT16_C( 8990), INT16_C( 9119), INT16_C( 3798), -INT16_C( 6834), + INT16_C( 1343), -INT16_C( 1875), -INT16_C( 778), INT16_C( 7639) }, + { INT16_C( 5027), INT16_C( 2079), INT16_C( 2810), INT16_C( 1112) }, + INT8_C( 0), + { INT32_C( 14361415), -INT32_C( 18207926), -INT32_C( 7706270), INT32_C( 76758382) } }, + { { -INT32_C( 191455), INT32_C( 878296), INT32_C( 795255), -INT32_C( 663269) }, + { INT16_C( 4146), -INT16_C( 589), -INT16_C( 7951), INT16_C( 6759), + INT16_C( 3630), -INT16_C( 5969), -INT16_C( 2209), INT16_C( 5306) }, + { -INT16_C( 3507), INT16_C( 380), INT16_C( 119), -INT16_C( 7800) }, + INT8_C( 3), + { -INT32_C( 56819455), INT32_C( 93994696), INT32_C( 35255655), -INT32_C( 83436869) } }, + { { INT32_C( 643659), -INT32_C( 678171), -INT32_C( 638593), INT32_C( 431494) }, + { INT16_C( 8065), -INT16_C( 1701), -INT16_C( 9074), INT16_C( 4292), + INT16_C( 7650), INT16_C( 2543), INT16_C( 2984), INT16_C( 3881) }, + { INT16_C( 8706), INT16_C( 241), -INT16_C( 8993), -INT16_C( 4041) }, + INT8_C( 2), + { -INT32_C(136949241), -INT32_C( 46416569), -INT32_C( 54308817), -INT32_C( 69372172) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + simde_int16x4_t v = simde_vld1_s16(test_vec[i].v); + simde_int32x4_t r; + SIMDE_CONSTIFY_4_(simde_vqdmlal_high_lane_s16, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmlal_high_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[8]; + int16_t v[8]; + int8_t lane; + int32_t r[4]; + } test_vec[] = { + { { INT32_C( 127447), INT32_C( 822978), -INT32_C( 697650), -INT32_C( 247883) }, + { -INT16_C( 2100), -INT16_C( 3430), -INT16_C( 6622), INT16_C( 1972), + -INT16_C( 428), INT16_C( 871), -INT16_C( 3600), INT16_C( 4013) }, + { INT16_C( 5528), -INT16_C( 610), -INT16_C( 1150), -INT16_C( 8205), + INT16_C( 9857), -INT16_C( 697), -INT16_C( 2319), INT16_C( 6430) }, + INT8_C( 3), + { INT32_C( 7150927), -INT32_C( 13470132), INT32_C( 58378350), -INT32_C( 66101213) } }, + { { INT32_C( 455747), INT32_C( 701429), -INT32_C( 904921), -INT32_C( 464534) }, + { -INT16_C( 7141), -INT16_C( 6045), INT16_C( 1342), INT16_C( 6390), + INT16_C( 4621), -INT16_C( 5579), -INT16_C( 7186), INT16_C( 9665) }, + { INT16_C( 1260), INT16_C( 4914), -INT16_C( 5142), INT16_C( 4801), + -INT16_C( 2144), -INT16_C( 1681), -INT16_C( 1211), INT16_C( 8297) }, + INT8_C( 2), + { -INT32_C( 47066617), INT32_C( 58075865), INT32_C( 72995903), -INT32_C( 99859394) } }, + { { INT32_C( 638305), -INT32_C( 186852), -INT32_C( 213973), -INT32_C( 931458) }, + { INT16_C( 7760), INT16_C( 4425), INT16_C( 8019), -INT16_C( 8933), + INT16_C( 7668), INT16_C( 839), -INT16_C( 3102), INT16_C( 9527) }, + { -INT16_C( 6989), INT16_C( 9603), INT16_C( 2293), -INT16_C( 1576), + INT16_C( 22), -INT16_C( 7541), INT16_C( 861), INT16_C( 5508) }, + INT8_C( 2), + { INT32_C( 35803753), INT32_C( 3660802), -INT32_C( 14439745), INT32_C( 42759364) } }, + { { INT32_C( 467159), INT32_C( 476002), INT32_C( 413763), -INT32_C( 274557) }, + { -INT16_C( 9758), INT16_C( 2410), -INT16_C( 1361), INT16_C( 2827), + -INT16_C( 1655), INT16_C( 6770), -INT16_C( 95), INT16_C( 3127) }, + { INT16_C( 2109), -INT16_C( 2712), -INT16_C( 7011), -INT16_C( 7930), + -INT16_C( 5565), INT16_C( 8254), -INT16_C( 2066), INT16_C( 217) }, + INT8_C( 1), + { INT32_C( 9443879), -INT32_C( 36244478), INT32_C( 929043), -INT32_C( 17235405) } }, + { { -INT32_C( 915165), -INT32_C( 571143), -INT32_C( 687588), -INT32_C( 595102) }, + { -INT16_C( 3396), -INT16_C( 330), INT16_C( 7337), -INT16_C( 7912), + INT16_C( 9264), -INT16_C( 6773), -INT16_C( 8665), INT16_C( 3851) }, + { -INT16_C( 2715), INT16_C( 3092), -INT16_C( 4692), INT16_C( 2741), + INT16_C( 6259), INT16_C( 5204), INT16_C( 7522), -INT16_C( 7232) }, + INT8_C( 3), + { INT32_C( 49870083), -INT32_C( 37700729), -INT32_C( 48189118), INT32_C( 20516080) } }, + { { INT32_C( 627584), -INT32_C( 43401), INT32_C( 754581), INT32_C( 870436) }, + { INT16_C( 2524), INT16_C( 7564), INT16_C( 5391), -INT16_C( 4454), + INT16_C( 2191), INT16_C( 4050), -INT16_C( 3970), -INT16_C( 4712) }, + { -INT16_C( 1285), -INT16_C( 2198), -INT16_C( 3116), -INT16_C( 1852), + INT16_C( 7505), -INT16_C( 5812), -INT16_C( 2990), -INT16_C( 5934) }, + INT8_C( 3), + { -INT32_C( 7487880), -INT32_C( 15044601), INT32_C( 15459461), INT32_C( 18323684) } }, + { { -INT32_C( 54830), -INT32_C( 203132), -INT32_C( 595975), -INT32_C( 337050) }, + { -INT16_C( 1470), INT16_C( 7910), -INT16_C( 8333), -INT16_C( 3329), + INT16_C( 6685), -INT16_C( 7194), INT16_C( 5248), -INT16_C( 7801) }, + { INT16_C( 9530), INT16_C( 4964), INT16_C( 3207), INT16_C( 8287), + INT16_C( 716), -INT16_C( 2058), -INT16_C( 6439), -INT16_C( 5117) }, + INT8_C( 2), + { INT32_C( 42822760), -INT32_C( 46345448), INT32_C( 33064697), -INT32_C( 50372664) } }, + { { INT32_C( 35118), -INT32_C( 320708), -INT32_C( 51072), INT32_C( 492529) }, + { INT16_C( 757), INT16_C( 6726), -INT16_C( 6022), -INT16_C( 8158), + INT16_C( 5620), INT16_C( 1231), -INT16_C( 260), -INT16_C( 8318) }, + { INT16_C( 156), INT16_C( 8068), -INT16_C( 7568), INT16_C( 9602), + -INT16_C( 6431), INT16_C( 9768), -INT16_C( 6471), INT16_C( 2563) }, + INT8_C( 0), + { INT32_C( 1788558), INT32_C( 63364), -INT32_C( 132192), -INT32_C( 2102687) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + simde_int16x8_t v = simde_vld1q_s16(test_vec[i].v); + simde_int32x4_t r; + SIMDE_CONSTIFY_8_(simde_vqdmlal_high_laneq_s16, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmlal_high_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[4]; + int32_t v[4]; + int8_t lane; + int64_t r[2]; + } test_vec[] = { + { { INT64_C( 13898671), -INT64_C( 28702086) }, + { -INT32_C( 263510), -INT32_C( 347579), INT32_C( 942651), -INT32_C( 19214) }, + { -INT32_C( 985912), INT32_C( 863503), -INT32_C( 864486), -INT32_C( 67563) }, + INT8_C( 3), + { -INT64_C( 127362760355), INT64_C( 2567608878) } }, + { { -INT64_C( 49550599), INT64_C( 12924804) }, + { INT32_C( 569339), INT32_C( 916303), INT32_C( 322651), -INT32_C( 403309) }, + { INT32_C( 68174), -INT32_C( 513605), INT32_C( 1763), -INT32_C( 583573) }, + INT8_C( 1), + { -INT64_C( 331479884309), INT64_C( 414295962694) } }, + { { INT64_C( 33224957), -INT64_C( 60263322) }, + { -INT32_C( 733914), INT32_C( 345428), -INT32_C( 188026), -INT32_C( 361972) }, + { -INT32_C( 436531), INT32_C( 676538), INT32_C( 34787), -INT32_C( 755451) }, + INT8_C( 2), + { -INT64_C( 13048495967), -INT64_C( 25244103250) } }, + { { -INT64_C( 52374386), -INT64_C( 63935054) }, + { INT32_C( 407419), -INT32_C( 463334), -INT32_C( 37967), INT32_C( 535562) }, + { -INT32_C( 286385), -INT32_C( 865597), INT32_C( 573606), INT32_C( 589682) }, + INT8_C( 0), + { INT64_C( 21693984204), -INT64_C( 306817781794) } }, + { { -INT64_C( 6598636), INT64_C( 73031330) }, + { INT32_C( 493558), INT32_C( 443848), -INT32_C( 419461), INT32_C( 376534) }, + { -INT32_C( 860362), -INT32_C( 176951), INT32_C( 253114), INT32_C( 41359) }, + INT8_C( 1), + { INT64_C( 148441488186), -INT64_C( 133183104338) } }, + { { -INT64_C( 71693657), INT64_C( 31801833) }, + { -INT32_C( 640025), INT32_C( 582287), INT32_C( 257565), INT32_C( 667728) }, + { INT32_C( 984425), INT32_C( 972052), INT32_C( 466460), INT32_C( 325387) }, + INT8_C( 0), + { INT64_C( 507035156593), INT64_C(1314688074633) } }, + { { INT64_C( 80798879), -INT64_C( 5132023) }, + { INT32_C( 40841), INT32_C( 230578), -INT32_C( 63996), INT32_C( 386471) }, + { -INT32_C( 872708), -INT32_C( 69206), INT32_C( 475254), -INT32_C( 518991) }, + INT8_C( 3), + { INT64_C( 66507494951), -INT64_C( 401155073545) } }, + { { -INT64_C( 7962780), INT64_C( 26698152) }, + { -INT32_C( 245699), -INT32_C( 779577), INT32_C( 614398), INT32_C( 990465) }, + { -INT32_C( 682597), INT32_C( 354148), INT32_C( 119693), -INT32_C( 196681) }, + INT8_C( 0), + { -INT64_C( 838780425992), -INT64_C(1352150177058) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + simde_int32x4_t v = simde_vld1q_s32(test_vec[i].v); + simde_int64x2_t r; + SIMDE_CONSTIFY_4_(simde_vqdmlal_high_laneq_s32, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmlal_high_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[4]; + int32_t v[2]; + int8_t lane; + int64_t r[2]; + } test_vec[] = { + { { -INT64_C( 22649710), INT64_C( 73149530) }, + { -INT32_C( 674621), -INT32_C( 663040), INT32_C( 244552), INT32_C( 207830) }, + { INT32_C( 291873), INT32_C( 695008) }, + INT8_C( 1), + { INT64_C( 339908543122), INT64_C( 288960174810) } }, + { { INT64_C( 74893760), INT64_C( 21422606) }, + { -INT32_C( 756129), -INT32_C( 910364), -INT32_C( 884819), INT32_C( 337637) }, + { -INT32_C( 926175), INT32_C( 65143) }, + INT8_C( 1), + { -INT64_C( 115204634474), INT64_C( 44010796788) } }, + { { -INT64_C( 87847761), -INT64_C( 71055001) }, + { INT32_C( 532811), -INT32_C( 865841), INT32_C( 608434), INT32_C( 302705) }, + { INT32_C( 67173), -INT32_C( 302540) }, + INT8_C( 0), + { INT64_C( 81652826403), INT64_C( 40596150929) } }, + { { -INT64_C( 41489469), INT64_C( 33328384) }, + { INT32_C( 266498), INT32_C( 888074), -INT32_C( 970120), INT32_C( 774275) }, + { INT32_C( 620043), INT32_C( 473602) }, + INT8_C( 1), + { -INT64_C( 918943033949), INT64_C( 733429705484) } }, + { { INT64_C( 96386778), INT64_C( 11393205) }, + { INT32_C( 679805), INT32_C( 6544), INT32_C( 994812), INT32_C( 231461) }, + { -INT32_C( 690699), -INT32_C( 511317) }, + INT8_C( 1), + { -INT64_C(1017232188030), -INT64_C( 236688495069) } }, + { { INT64_C( 40244631), -INT64_C( 1612051) }, + { -INT32_C( 415899), -INT32_C( 119442), -INT32_C( 220145), -INT32_C( 506440) }, + { INT32_C( 7233), INT32_C( 114175) }, + INT8_C( 0), + { -INT64_C( 3144372939), -INT64_C( 7327773091) } }, + { { -INT64_C( 57747523), -INT64_C( 44936371) }, + { INT32_C( 385308), INT32_C( 496935), -INT32_C( 919538), -INT32_C( 160369) }, + { INT32_C( 348290), INT32_C( 358396) }, + INT8_C( 1), + { -INT64_C( 659175229619), -INT64_C( 114996152619) } }, + { { INT64_C( 80405546), -INT64_C( 36417386) }, + { -INT32_C( 895739), -INT32_C( 737647), -INT32_C( 168388), -INT32_C( 183564) }, + { -INT32_C( 562898), -INT32_C( 405129) }, + INT8_C( 0), + { INT64_C( 189650942394), INT64_C( 206619199558) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + simde_int32x2_t v = simde_vld1_s32(test_vec[i].v); + simde_int64x2_t r; + SIMDE_CONSTIFY_2_(simde_vqdmlal_high_lane_s32, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlal_high_lane_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlal_high_lane_s32) + +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlal_high_laneq_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlal_high_laneq_s32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qdmlal_high_n.c b/test/arm/neon/qdmlal_high_n.c new file mode 100644 index 000000000..67cdaf411 --- /dev/null +++ b/test/arm/neon/qdmlal_high_n.c @@ -0,0 +1,127 @@ +#define SIMDE_TEST_ARM_NEON_INSN qdmlal_high_n + +#include "test-neon.h" +#include "../../../simde/arm/neon/qdmlal_high_n.h" + +static int +test_simde_vqdmlal_high_n_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[8]; + int16_t c; + int32_t r[4]; + } test_vec[] = { + { { -INT32_C( 413046), INT32_C( 443321), INT32_C( 536360), -INT32_C( 57359) }, + { -INT16_C( 5373), INT16_C( 7648), -INT16_C( 2270), -INT16_C( 9644), + -INT16_C( 442), INT16_C( 7552), -INT16_C( 9649), -INT16_C( 8624) }, + INT16_C( 9006), + { -INT32_C( 8374350), INT32_C( 136469945), -INT32_C( 173261428), -INT32_C( 155392847) } }, + { { -INT32_C( 328100), INT32_C( 888774), -INT32_C( 586192), INT32_C( 106665) }, + { -INT16_C( 8058), -INT16_C( 1793), INT16_C( 5117), INT16_C( 3494), + INT16_C( 3150), -INT16_C( 1621), INT16_C( 2769), -INT16_C( 6834) }, + INT16_C( 1088), + { INT32_C( 6526300), -INT32_C( 2638522), INT32_C( 5439152), -INT32_C( 14764119) } }, + { { -INT32_C( 306051), INT32_C( 407057), INT32_C( 601575), -INT32_C( 219908) }, + { -INT16_C( 4202), INT16_C( 9749), -INT16_C( 3094), INT16_C( 8199), + -INT16_C( 9563), INT16_C( 6040), INT16_C( 6427), INT16_C( 9933) }, + INT16_C( 4283), + { -INT32_C( 82222709), INT32_C( 52145697), INT32_C( 55655257), INT32_C( 84866170) } }, + { { -INT32_C( 757353), INT32_C( 689083), -INT32_C( 350090), INT32_C( 926036) }, + { -INT16_C( 4396), -INT16_C( 5546), INT16_C( 44), -INT16_C( 8707), + INT16_C( 1167), -INT16_C( 6498), INT16_C( 9216), -INT16_C( 7953) }, + -INT16_C( 4177), + { -INT32_C( 10506471), INT32_C( 54973375), -INT32_C( 77340554), INT32_C( 67365398) } }, + { { INT32_C( 550132), -INT32_C( 561004), INT32_C( 619629), -INT32_C( 862339) }, + { INT16_C( 7220), INT16_C( 4704), -INT16_C( 6936), INT16_C( 6210), + INT16_C( 8306), -INT16_C( 274), INT16_C( 6211), -INT16_C( 9252) }, + INT16_C( 5964), + { INT32_C( 99624100), -INT32_C( 3829276), INT32_C( 74704437), -INT32_C( 111220195) } }, + { { -INT32_C( 190829), INT32_C( 83384), -INT32_C( 684177), -INT32_C( 304022) }, + { INT16_C( 3361), -INT16_C( 8991), INT16_C( 2958), -INT16_C( 5174), + INT16_C( 4223), INT16_C( 1411), INT16_C( 3399), -INT16_C( 3695) }, + -INT16_C( 9502), + { -INT32_C( 80444721), -INT32_C( 26731260), -INT32_C( 65278773), INT32_C( 69915758) } }, + { { -INT32_C( 574255), INT32_C( 987356), INT32_C( 971545), -INT32_C( 3766) }, + { -INT16_C( 2909), -INT16_C( 1186), INT16_C( 4126), INT16_C( 63), + INT16_C( 6070), INT16_C( 284), INT16_C( 8912), -INT16_C( 4007) }, + INT16_C( 3652), + { INT32_C( 43761025), INT32_C( 3061692), INT32_C( 66064793), -INT32_C( 29270894) } }, + { { -INT32_C( 85805), -INT32_C( 567351), INT32_C( 225206), -INT32_C( 716760) }, + { -INT16_C( 6426), -INT16_C( 6271), -INT16_C( 104), -INT16_C( 6015), + INT16_C( 485), INT16_C( 5900), -INT16_C( 5869), -INT16_C( 1878) }, + INT16_C( 7731), + { INT32_C( 7413265), INT32_C( 90658449), -INT32_C( 90521272), -INT32_C( 29754396) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + int16_t c = test_vec[i].c; + simde_int32x4_t r = simde_vqdmlal_high_n_s16(a, b, c); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmlal_high_n_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[4]; + int32_t c; + int64_t r[2]; + } test_vec[] = { + { { INT64_C( 52569594), INT64_C( 46784526) }, + { -INT32_C( 718351), INT32_C( 135729), -INT32_C( 794610), INT32_C( 77651) }, + -INT32_C( 972135), + { INT64_C(1544988954294), -INT64_C( 150927725244) } }, + { { INT64_C( 11830892), INT64_C( 32236020) }, + { INT32_C( 429132), INT32_C( 506878), INT32_C( 167297), -INT32_C( 960619) }, + -INT32_C( 451038), + { -INT64_C( 150902777680), INT64_C( 866583581064) } }, + { { INT64_C( 57262611), -INT64_C( 10112385) }, + { INT32_C( 786561), INT32_C( 88630), INT32_C( 867798), INT32_C( 990063) }, + INT32_C( 318577), + { INT64_C( 552978229503), INT64_C( 630812488317) } }, + { { INT64_C( 59464933), -INT64_C( 99742040) }, + { -INT32_C( 544326), -INT32_C( 464399), -INT32_C( 522580), INT32_C( 887554) }, + INT32_C( 207064), + { -INT64_C( 216355545307), INT64_C( 367461220872) } }, + { { -INT64_C( 57110833), INT64_C( 51531791) }, + { INT32_C( 254945), INT32_C( 533216), -INT32_C( 261726), INT32_C( 879663) }, + INT32_C( 268600), + { -INT64_C( 140656318033), INT64_C( 472606495391) } }, + { { -INT64_C( 97431652), -INT64_C( 32423614) }, + { -INT32_C( 167998), INT32_C( 569412), -INT32_C( 552599), INT32_C( 643674) }, + INT32_C( 101122), + { -INT64_C( 111857263808), INT64_C( 130146780842) } }, + { { -INT64_C( 19553586), -INT64_C( 346370) }, + { -INT32_C( 583295), -INT32_C( 5387), INT32_C( 721742), -INT32_C( 978622) }, + INT32_C( 631850), + { INT64_C( 912045811814), -INT64_C(1236684967770) } }, + { { INT64_C( 32084642), -INT64_C( 49169174) }, + { INT32_C( 535706), INT32_C( 97715), -INT32_C( 835258), -INT32_C( 994676) }, + -INT32_C( 559353), + { INT64_C( 934440220790), INT64_C(1112700840082) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + int32_t c = test_vec[i].c; + simde_int64x2_t r = simde_vqdmlal_high_n_s32(a, b, c); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlal_high_n_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlal_high_n_s32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qdmlal_lane.c b/test/arm/neon/qdmlal_lane.c new file mode 100644 index 000000000..5f12ff146 --- /dev/null +++ b/test/arm/neon/qdmlal_lane.c @@ -0,0 +1,593 @@ +#define SIMDE_TEST_ARM_NEON_INSN qdmlal_lane + +#include "test-neon.h" +#include "../../../simde/arm/neon/qdmlal_lane.h" + +SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ + +static int +test_simde_vqdmlalh_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[1]; + int16_t b[1]; + int16_t v[4]; + int8_t lane; + int32_t r[1]; + } test_vec[] = { + { { -INT32_C( 928597325) }, + { INT16_C( 491) }, + { -INT16_C( 1892), INT16_C( 7957), INT16_C( 5108), -INT16_C( 4917)}, + INT8_C( 1), + { -INT32_C( 920783551) } }, + { { INT32_C( 955503274) }, + { INT16_C( 5989) }, + { INT16_C( 2544), INT16_C( 6322), -INT16_C( 3642), INT16_C( 6506)}, + INT8_C( 3), + { INT32_C(1033432142) } }, + { { -INT32_C( 114948833) }, + { -INT16_C( 1212) }, + { -INT16_C( 2485), INT16_C( 6915), INT16_C( 4401), -INT16_C( 4449)}, + INT8_C( 2), + { -INT32_C( 125616857) } }, + { { -INT32_C( 905358336) }, + { -INT16_C( 3455) }, + { INT16_C( 4546), INT16_C( 9051), -INT16_C( 2684), -INT16_C( 1201)}, + INT8_C( 0), + { -INT32_C( 936771196) } }, + { { -INT32_C( 126619894) }, + { -INT16_C( 5411) }, + { INT16_C( 2205), INT16_C( 7625), -INT16_C( 8915), -INT16_C( 429)}, + INT8_C( 1), + { -INT32_C( 209137644) } }, + { { INT32_C( 194201072) }, + { INT16_C( 1004) }, + { INT16_C( 9001), INT16_C( 4983), -INT16_C( 7323), INT16_C( 5349)}, + INT8_C( 1), + { INT32_C( 204206936) } }, + { { INT32_C( 708756366) }, + { -INT16_C( 9878) }, + { INT16_C( 4537), INT16_C( 4529), -INT16_C( 5439), -INT16_C( 1731)}, + INT8_C( 0), + { INT32_C( 619123394) } }, + { { -INT32_C( 380764463) }, + { -INT16_C( 1634) }, + { -INT16_C( 4899), INT16_C( 1026), -INT16_C( 4249), -INT16_C( 6523)}, + INT8_C( 0), + { -INT32_C( 364754531) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x4_t v = simde_vld1_s16(test_vec[i].v); + int32_t r; + switch(test_vec[i].lane) { + case 0: r = simde_vqdmlalh_lane_s16(test_vec[i].a[0], test_vec[i].b[0], v, 0); break; + case 1: r = simde_vqdmlalh_lane_s16(test_vec[i].a[0], test_vec[i].b[0], v, 1); break; + case 2: r = simde_vqdmlalh_lane_s16(test_vec[i].a[0], test_vec[i].b[0], v, 2); break; + case 3: r = simde_vqdmlalh_lane_s16(test_vec[i].a[0], test_vec[i].b[0], v, 3); break; + default: HEDLEY_UNREACHABLE(); r = 0; break; + } + + simde_assert_equal_i32(r, test_vec[i].r[0]); + } + + return 0; +} + +static int +test_simde_vqdmlals_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[1]; + int32_t b[1]; + int32_t v[2]; + int8_t lane; + int64_t r[1]; + } test_vec[] = { + { { INT64_C( 44508685950) }, + { -INT32_C( 979248) }, + { INT32_C( 943996), INT32_C( 693027)}, + INT8_C( 1), + { -INT64_C(1312781921442) } }, + { { INT64_C( 76828192571) }, + { -INT32_C( 730800) }, + { INT32_C( 448180), -INT32_C( 687279)}, + INT8_C( 1), + { INT64_C(1081355178971) } }, + { { INT64_C( 15972296819) }, + { -INT32_C( 166598) }, + { INT32_C( 434007), -INT32_C( 332523)}, + INT8_C( 0), + { -INT64_C( 128637099553) } }, + { { -INT64_C( 29112908142) }, + { -INT32_C( 890217) }, + { -INT32_C( 871566), INT32_C( 183981)}, + INT8_C( 0), + { INT64_C(1522652831502) } }, + { { -INT64_C( 66884081543) }, + { -INT32_C( 358235) }, + { -INT32_C( 199183), INT32_C( 166087)}, + INT8_C( 0), + { INT64_C( 75824562467) } }, + { { -INT64_C( 45375791253) }, + { INT32_C( 471327) }, + { INT32_C( 639915), INT32_C( 825301)}, + INT8_C( 1), + { INT64_C( 732597497601) } }, + { { INT64_C( 93826405589) }, + { -INT32_C( 99269) }, + { -INT32_C( 628747), -INT32_C( 507344)}, + INT8_C( 1), + { INT64_C( 194553468661) } }, + { { INT64_C( 17271492226) }, + { -INT32_C( 717291) }, + { INT32_C( 310553), -INT32_C( 927821)}, + INT8_C( 0), + { -INT64_C( 428242251620) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x2_t v = simde_vld1_s32(test_vec[i].v); + int64_t r; + switch(test_vec[i].lane) { + case 0: r = simde_vqdmlals_lane_s32(test_vec[i].a[0], test_vec[i].b[0], v, 0); break; + case 1: r = simde_vqdmlals_lane_s32(test_vec[i].a[0], test_vec[i].b[0], v, 1); break; + default: HEDLEY_UNREACHABLE(); r = 0; break; + } + + simde_assert_equal_i64(r, test_vec[i].r[0]); + } + + return 0; +} + +static int +test_simde_vqdmlalh_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[1]; + int16_t b[1]; + int16_t v[8]; + int8_t lane; + int32_t r[1]; + } test_vec[] = { + { { -INT32_C( 128876435) }, + { INT16_C( 8167) }, + { INT16_C( 4367), INT16_C( 5524), INT16_C( 3500), INT16_C( 7833), + -INT16_C( 1658), INT16_C( 5756), -INT16_C( 3725), -INT16_C( 7243)}, + INT8_C( 6), + { -INT32_C( 189720585) } }, + { { -INT32_C( 832887394) }, + { -INT16_C( 5992) }, + { INT16_C( 718), INT16_C( 1503), -INT16_C( 1635), -INT16_C( 9088), + INT16_C( 2951), INT16_C( 9114), -INT16_C( 2329), INT16_C( 5502)}, + INT8_C( 5), + { -INT32_C( 942109570) } }, + { { -INT32_C( 533589683) }, + { INT16_C( 8157) }, + { -INT16_C( 170), -INT16_C( 7579), -INT16_C( 5647), -INT16_C( 8798), + INT16_C( 9007), -INT16_C( 1908), INT16_C( 3466), INT16_C( 1187)}, + INT8_C( 2), + { -INT32_C( 625714841) } }, + { { -INT32_C( 312222225) }, + { -INT16_C( 7476) }, + { -INT16_C( 8422), -INT16_C( 5973), INT16_C( 9555), -INT16_C( 2689), + INT16_C( 9787), -INT16_C( 6417), -INT16_C( 2133), INT16_C( 6433)}, + INT8_C( 2), + { -INT32_C( 455088585) } }, + { { INT32_C( 317735354) }, + { INT16_C( 8102) }, + { INT16_C( 3790), -INT16_C( 8283), -INT16_C( 6754), -INT16_C( 8752), + -INT16_C( 9863), -INT16_C( 1371), INT16_C( 6147), INT16_C( 2844)}, + INT8_C( 4), + { INT32_C( 157915302) } }, + { { INT32_C( 141559338) }, + { INT16_C( 8282) }, + { INT16_C( 2436), INT16_C( 4026), INT16_C( 7400), INT16_C( 3893), + -INT16_C( 3891), -INT16_C( 8306), INT16_C( 1775), INT16_C( 6062)}, + INT8_C( 5), + { INT32_C( 3978754) } }, + { { -INT32_C( 417376500) }, + { INT16_C( 3995) }, + { INT16_C( 5383), INT16_C( 9262), -INT16_C( 5678), INT16_C( 3658), + INT16_C( 4019), -INT16_C( 9278), -INT16_C( 5962), INT16_C( 9759)}, + INT8_C( 3), + { -INT32_C( 388149080) } }, + { { -INT32_C( 753942094) }, + { INT16_C( 6863) }, + { -INT16_C( 2747), -INT16_C( 2461), -INT16_C( 4631), INT16_C( 2782), + INT16_C( 2898), -INT16_C( 6022), INT16_C( 9230), -INT16_C( 3066)}, + INT8_C( 0), + { -INT32_C( 791647416) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t v = simde_vld1q_s16(test_vec[i].v); + int32_t r; + switch(test_vec[i].lane) { + case 0: r = simde_vqdmlalh_laneq_s16(test_vec[i].a[0], test_vec[i].b[0], v, 0); break; + case 1: r = simde_vqdmlalh_laneq_s16(test_vec[i].a[0], test_vec[i].b[0], v, 1); break; + case 2: r = simde_vqdmlalh_laneq_s16(test_vec[i].a[0], test_vec[i].b[0], v, 2); break; + case 3: r = simde_vqdmlalh_laneq_s16(test_vec[i].a[0], test_vec[i].b[0], v, 3); break; + case 4: r = simde_vqdmlalh_laneq_s16(test_vec[i].a[0], test_vec[i].b[0], v, 4); break; + case 5: r = simde_vqdmlalh_laneq_s16(test_vec[i].a[0], test_vec[i].b[0], v, 5); break; + case 6: r = simde_vqdmlalh_laneq_s16(test_vec[i].a[0], test_vec[i].b[0], v, 6); break; + case 7: r = simde_vqdmlalh_laneq_s16(test_vec[i].a[0], test_vec[i].b[0], v, 7); break; + default: HEDLEY_UNREACHABLE(); r = 0; break; + } + + simde_assert_equal_i32(r, test_vec[i].r[0]); + } + + return 0; +} + +static int +test_simde_vqdmlals_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[1]; + int32_t b[1]; + int32_t v[4]; + int8_t lane; + int64_t r[1]; + } test_vec[] = { + { { -INT64_C( 559402566429) }, + { -INT32_C( 136817) }, + { INT32_C( 972146), INT32_C( 645928), INT32_C( 80253), -INT32_C( 53618)}, + INT8_C( 2), + { -INT64_C( 581362515831) } }, + { { INT64_C( 968324900099) }, + { -INT32_C( 504386) }, + { INT32_C( 754043), -INT32_C( 755209), -INT32_C( 264308), -INT32_C( 501739)}, + INT8_C( 3), + { INT64_C(1474465154607) } }, + { { -INT64_C( 912937153168) }, + { INT32_C( 562877) }, + { -INT32_C( 738047), -INT32_C( 379351), INT32_C( 248340), -INT32_C( 343121)}, + INT8_C( 0), + { -INT64_C(1743796515606) } }, + { { INT64_C( 330634123075) }, + { -INT32_C( 665378) }, + { -INT32_C( 343916), -INT32_C( 12921), -INT32_C( 389835), INT32_C( 753717)}, + INT8_C( 1), + { INT64_C( 347828821351) } }, + { { INT64_C( 97780480309) }, + { -INT32_C( 309734) }, + { INT32_C( 174702), -INT32_C( 633437), -INT32_C( 801887), INT32_C( 48250)}, + INT8_C( 3), + { INT64_C( 67891149309) } }, + { { -INT64_C( 960108357320) }, + { -INT32_C( 552347) }, + { INT32_C( 199505), INT32_C( 277114), -INT32_C( 377026), -INT32_C( 578807)}, + INT8_C( 2), + { -INT64_C( 543609997276) } }, + { { INT64_C( 261824341322) }, + { INT32_C( 531703) }, + { INT32_C( 902495), INT32_C( 897319), -INT32_C( 878787), -INT32_C( 407825)}, + INT8_C( 2), + { -INT64_C( 672683027200) } }, + { { INT64_C( 147822779320) }, + { -INT32_C( 289635) }, + { -INT32_C( 217936), -INT32_C( 918247), -INT32_C( 237518), -INT32_C( 18738)}, + INT8_C( 0), + { INT64_C( 274066566040) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t v = simde_vld1q_s32(test_vec[i].v); + int64_t r; + switch(test_vec[i].lane) { + case 0: r = simde_vqdmlals_laneq_s32(test_vec[i].a[0], test_vec[i].b[0], v, 0); break; + case 1: r = simde_vqdmlals_laneq_s32(test_vec[i].a[0], test_vec[i].b[0], v, 1); break; + case 2: r = simde_vqdmlals_laneq_s32(test_vec[i].a[0], test_vec[i].b[0], v, 2); break; + case 3: r = simde_vqdmlals_laneq_s32(test_vec[i].a[0], test_vec[i].b[0], v, 3); break; + default: HEDLEY_UNREACHABLE(); r = 0; break; + } + + simde_assert_equal_i64(r, test_vec[i].r[0]); + } + + return 0; +} + +static int +test_simde_vqdmlal_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[4]; + int16_t v[4]; + int8_t lane; + int32_t r[4]; + } test_vec[] = { + { { -INT32_C( 425772172), -INT32_C( 870450928), -INT32_C( 353638269), INT32_C( 869320330) }, + { INT16_C( 5757), INT16_C( 3583), -INT16_C( 7892), INT16_C( 5774) }, + { -INT16_C( 6124), -INT16_C( 2665), -INT16_C( 748), -INT16_C( 4559)}, + INT8_C( 0), + { -INT32_C( 496283908), -INT32_C( 914335512), -INT32_C( 256977053), INT32_C( 798600378) } }, + { { INT32_C( 264890360), -INT32_C( 132935996), -INT32_C( 765529797), -INT32_C( 209964978) }, + { -INT16_C( 6125), -INT16_C( 1669), -INT16_C( 9417), -INT16_C( 5662) }, + { -INT16_C( 4094), -INT16_C( 4945), INT16_C( 1153), -INT16_C( 1279)}, + INT8_C( 2), + { INT32_C( 250766110), -INT32_C( 136784710), -INT32_C( 787245399), -INT32_C( 223021550) } }, + { { -INT32_C( 687037777), INT32_C( 350547), -INT32_C( 917055825), INT32_C( 467930951) }, + { INT16_C( 8), -INT16_C( 7260), -INT16_C( 7320), INT16_C( 6398) }, + { INT16_C( 2317), INT16_C( 2456), INT16_C( 3713), -INT16_C( 8705)}, + INT8_C( 3), + { -INT32_C( 687177057), INT32_C( 126747147), -INT32_C( 789614625), INT32_C( 356541771) } }, + { { INT32_C( 831645449), -INT32_C( 458276988), -INT32_C( 533944055), -INT32_C( 897913350) }, + { INT16_C( 2766), INT16_C( 6819), INT16_C( 570), -INT16_C( 6948) }, + { INT16_C( 7445), INT16_C( 6907), INT16_C( 2870), INT16_C( 4578)}, + INT8_C( 2), + { INT32_C( 847522289), -INT32_C( 419135928), -INT32_C( 530672255), -INT32_C( 937794870) } }, + { { INT32_C( 215114780), INT32_C( 289582821), -INT32_C( 183295087), INT32_C( 400458546) }, + { -INT16_C( 4761), -INT16_C( 5175), -INT16_C( 7164), -INT16_C( 9471) }, + { INT16_C( 2309), -INT16_C( 9504), INT16_C( 2390), -INT16_C( 501)}, + INT8_C( 3), + { INT32_C( 219885302), INT32_C( 294768171), -INT32_C( 176116759), INT32_C( 409948488) } }, + { { -INT32_C( 228273077), -INT32_C( 573738564), INT32_C( 805346111), INT32_C( 451542412) }, + { INT16_C( 5915), -INT16_C( 5948), -INT16_C( 5277), -INT16_C( 1081) }, + { -INT16_C( 3952), -INT16_C( 8119), -INT16_C( 3455), -INT16_C( 3040)}, + INT8_C( 3), + { -INT32_C( 264236277), -INT32_C( 537574724), INT32_C( 837430271), INT32_C( 458114892) } }, + { { INT32_C( 814789159), -INT32_C( 815174192), INT32_C( 796268231), -INT32_C( 450444009) }, + { -INT16_C( 7389), INT16_C( 8929), INT16_C( 9568), INT16_C( 9997) }, + { -INT16_C( 8028), INT16_C( 586), -INT16_C( 5375), -INT16_C( 1471)}, + INT8_C( 1), + { INT32_C( 806129251), -INT32_C( 804709404), INT32_C( 807481927), -INT32_C( 438727525) } }, + { { -INT32_C( 765711601), INT32_C( 489237452), -INT32_C( 297322404), -INT32_C( 918947291) }, + { -INT16_C( 5434), INT16_C( 495), INT16_C( 8036), -INT16_C( 9574) }, + { -INT16_C( 7119), -INT16_C( 6426), INT16_C( 5367), -INT16_C( 6430)}, + INT8_C( 3), + { -INT32_C( 695830361), INT32_C( 482871752), -INT32_C( 400665364), -INT32_C( 795825651) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); + simde_int16x4_t v = simde_vld1_s16(test_vec[i].v); + simde_int32x4_t r; + switch(test_vec[i].lane) { + case 0: r = simde_vqdmlal_lane_s16(a, b, v, 0); break; + case 1: r = simde_vqdmlal_lane_s16(a, b, v, 1); break; + case 2: r = simde_vqdmlal_lane_s16(a, b, v, 2); break; + case 3: r = simde_vqdmlal_lane_s16(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_s32(0); break; + } + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmlal_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[2]; + int32_t v[2]; + int8_t lane; + int64_t r[2]; + } test_vec[] = { + { { -INT64_C( 589163751624), INT64_C( 429706216903) }, + { INT32_C( 397), -INT32_C( 957326) }, + { -INT32_C( 883062), -INT32_C( 410472)}, + INT8_C( 0), + { -INT64_C( 589864902852), INT64_C(2120462641327) } }, + { { -INT64_C( 656392608678), -INT64_C( 503973207746) }, + { -INT32_C( 848069), INT32_C( 771551) }, + { -INT32_C( 629006), INT32_C( 576960)}, + INT8_C( 1), + { -INT64_C(1634996389158), INT64_C( 386334922174) } }, + { { -INT64_C( 95263852864), INT64_C( 665780368583) }, + { INT32_C( 603158), INT32_C( 943104) }, + { INT32_C( 202514), INT32_C( 9210)}, + INT8_C( 0), + { INT64_C( 149032025560), INT64_C(1047763895495) } }, + { { INT64_C( 126598555336), INT64_C( 273800393016) }, + { INT32_C( 526071), -INT32_C( 456475) }, + { -INT32_C( 63146), -INT32_C( 989530)}, + INT8_C( 0), + { INT64_C( 60159996604), INT64_C( 331449533716) } }, + { { INT64_C( 524521550518), -INT64_C( 231467718925) }, + { INT32_C( 386981), INT32_C( 504150) }, + { -INT32_C( 444746), INT32_C( 421317)}, + INT8_C( 1), + { INT64_C( 850604898472), INT64_C( 193346212175) } }, + { { INT64_C( 761045675378), -INT64_C( 119396145499) }, + { -INT32_C( 718853), INT32_C( 835642) }, + { INT32_C( 976927), INT32_C( 57882)}, + INT8_C( 0), + { -INT64_C( 643488134084), INT64_C(1513326318769) } }, + { { -INT64_C( 342414590564), -INT64_C( 156288463020) }, + { -INT32_C( 618980), -INT32_C( 442132) }, + { INT32_C( 799522), -INT32_C( 56231)}, + INT8_C( 0), + { -INT64_C(1332190845684), -INT64_C( 863276984828) } }, + { { INT64_C( 464983707360), INT64_C( 656959310680) }, + { -INT32_C( 986179), -INT32_C( 571141) }, + { INT32_C( 961008), -INT32_C( 49136)}, + INT8_C( 1), + { INT64_C( 561897490048), INT64_C( 713086479032) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); + simde_int32x2_t v = simde_vld1_s32(test_vec[i].v); + simde_int64x2_t r; + switch(test_vec[i].lane) { + case 0: r = simde_vqdmlal_lane_s32(a, b, v, 0); break; + case 1: r = simde_vqdmlal_lane_s32(a, b, v, 1); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_s64(0); break; + } + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmlal_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[4]; + int16_t v[8]; + int8_t lane; + int32_t r[4]; + } test_vec[] = { + { { INT32_C( 568155650), INT32_C( 543219523), INT32_C( 939396263), -INT32_C( 713646577) }, + { -INT16_C( 3287), INT16_C( 7702), -INT16_C( 304), -INT16_C( 7258) }, + { -INT16_C( 421), INT16_C( 9798), INT16_C( 7292), INT16_C( 3391), + -INT16_C( 4377), INT16_C( 6560), INT16_C( 6599), INT16_C( 6832)}, + INT8_C( 1), + { INT32_C( 503743598), INT32_C( 694147915), INT32_C( 933439079), -INT32_C( 855874345) } }, + { { -INT32_C( 851965500), INT32_C( 262689683), INT32_C( 937835367), -INT32_C( 937323231) }, + { -INT16_C( 9936), INT16_C( 9725), INT16_C( 5485), INT16_C( 1635) }, + { -INT16_C( 1806), -INT16_C( 1922), -INT16_C( 2032), INT16_C( 6913), + -INT16_C( 1575), -INT16_C( 3999), INT16_C( 5066), INT16_C( 755)}, + INT8_C( 2), + { -INT32_C( 811585596), INT32_C( 223167283), INT32_C( 915544327), -INT32_C( 943967871) } }, + { { -INT32_C( 476015894), INT32_C( 407431452), -INT32_C( 336240585), -INT32_C( 353356841) }, + { INT16_C( 8127), -INT16_C( 1357), INT16_C( 1858), INT16_C( 944) }, + { -INT16_C( 6845), INT16_C( 9162), INT16_C( 8682), INT16_C( 4334), + -INT16_C( 3185), -INT16_C( 9666), INT16_C( 8724), INT16_C( 7844)}, + INT8_C( 4), + { -INT32_C( 527784884), INT32_C( 416075542), -INT32_C( 348076045), -INT32_C( 359370121) } }, + { { -INT32_C( 485224282), INT32_C( 706288413), INT32_C( 346759391), -INT32_C( 394498051) }, + { INT16_C( 2755), -INT16_C( 2327), INT16_C( 3952), INT16_C( 2153) }, + { -INT16_C( 8066), INT16_C( 299), -INT16_C( 1666), INT16_C( 5037), + INT16_C( 626), -INT16_C( 6025), INT16_C( 8365), INT16_C( 6783)}, + INT8_C( 2), + { -INT32_C( 494403942), INT32_C( 714041977), INT32_C( 333591327), -INT32_C( 401671847) } }, + { { -INT32_C( 735474031), -INT32_C( 433366471), INT32_C( 857935216), -INT32_C( 171303334) }, + { -INT16_C( 656), INT16_C( 8961), INT16_C( 7026), -INT16_C( 8637) }, + { -INT16_C( 3899), INT16_C( 3332), INT16_C( 4545), INT16_C( 6918), + -INT16_C( 8654), INT16_C( 2094), -INT16_C( 8807), -INT16_C( 5700)}, + INT8_C( 6), + { -INT32_C( 723919247), -INT32_C( 591205525), INT32_C( 734179252), -INT32_C( 19171216) } }, + { { INT32_C( 857914823), INT32_C( 134100517), -INT32_C( 671734036), -INT32_C( 821625611) }, + { INT16_C( 2826), -INT16_C( 2892), -INT16_C( 3258), INT16_C( 2609) }, + { -INT16_C( 8207), INT16_C( 2510), INT16_C( 6448), -INT16_C( 4161), + -INT16_C( 2965), -INT16_C( 4560), -INT16_C( 9611), -INT16_C( 1543)}, + INT8_C( 5), + { INT32_C( 832141703), INT32_C( 160475557), -INT32_C( 642021076), -INT32_C( 845419691) } }, + { { INT32_C( 180325523), INT32_C( 618326408), -INT32_C( 602592055), -INT32_C( 884991036) }, + { INT16_C( 263), -INT16_C( 6743), INT16_C( 7649), -INT16_C( 9877) }, + { INT16_C( 4276), -INT16_C( 5506), -INT16_C( 6679), INT16_C( 7635), + INT16_C( 6643), -INT16_C( 419), INT16_C( 6279), -INT16_C( 5078)}, + INT8_C( 5), + { INT32_C( 180105129), INT32_C( 623977042), -INT32_C( 609001917), -INT32_C( 876714110) } }, + { { INT32_C( 994862348), -INT32_C( 973583462), -INT32_C( 498991883), INT32_C( 730031301) }, + { INT16_C( 6654), INT16_C( 3001), INT16_C( 265), -INT16_C( 4764) }, + { INT16_C( 5869), -INT16_C( 4638), INT16_C( 6797), INT16_C( 7920), + -INT16_C( 7423), INT16_C( 3231), -INT16_C( 6303), INT16_C( 9659)}, + INT8_C( 6), + { INT32_C( 910982024), -INT32_C(1011414068), -INT32_C( 502332473), INT32_C( 790086285) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); + simde_int16x8_t v = simde_vld1q_s16(test_vec[i].v); + simde_int32x4_t r; + switch(test_vec[i].lane) { + case 0: r = simde_vqdmlal_laneq_s16(a, b, v, 0); break; + case 1: r = simde_vqdmlal_laneq_s16(a, b, v, 1); break; + case 2: r = simde_vqdmlal_laneq_s16(a, b, v, 2); break; + case 3: r = simde_vqdmlal_laneq_s16(a, b, v, 3); break; + case 4: r = simde_vqdmlal_laneq_s16(a, b, v, 4); break; + case 5: r = simde_vqdmlal_laneq_s16(a, b, v, 5); break; + case 6: r = simde_vqdmlal_laneq_s16(a, b, v, 6); break; + case 7: r = simde_vqdmlal_laneq_s16(a, b, v, 7); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_s32(0); break; + } + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmlal_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[2]; + int32_t v[4]; + int8_t lane; + int64_t r[2]; + } test_vec[] = { + { { INT64_C( 86143499926), INT64_C( 10686042104) }, + { INT32_C( 597840), INT32_C( 362874) }, + { INT32_C( 15572), -INT32_C( 288916), -INT32_C( 180511), -INT32_C( 121773)}, + INT8_C( 0), + { INT64_C( 104762628886), INT64_C( 21987389960) } }, + { { INT64_C( 70923230602), INT64_C( 22590881023) }, + { INT32_C( 491373), -INT32_C( 871823) }, + { INT32_C( 48311), INT32_C( 196262), INT32_C( 730961), INT32_C( 909459)}, + INT8_C( 2), + { INT64_C( 789272229508), -INT64_C(1251946342783) } }, + { { INT64_C( 23973086943), -INT64_C( 44722868031) }, + { -INT32_C( 898633), -INT32_C( 887236) }, + { INT32_C( 659525), -INT32_C( 85043), -INT32_C( 38786), INT32_C( 50165)}, + INT8_C( 1), + { INT64_C( 176817979381), INT64_C( 106183554265) } }, + { { INT64_C( 18816198374), INT64_C( 4696687039) }, + { INT32_C( 338910), INT32_C( 598815) }, + { -INT32_C( 787473), INT32_C( 671299), -INT32_C( 254311), INT32_C( 797400)}, + INT8_C( 3), + { INT64_C( 559309866374), INT64_C( 959686849039) } }, + { { INT64_C( 23088482966), -INT64_C( 37015946585) }, + { -INT32_C( 664356), INT32_C( 501139) }, + { -INT32_C( 892684), -INT32_C( 991933), INT32_C( 599568), INT32_C( 259352)}, + INT8_C( 2), + { -INT64_C( 773564713450), INT64_C( 563917869319) } }, + { { -INT64_C( 81827515540), INT64_C( 77367027948) }, + { -INT32_C( 160156), INT32_C( 198091) }, + { INT32_C( 635010), INT32_C( 938868), INT32_C( 465028), INT32_C( 201136)}, + INT8_C( 2), + { -INT64_C( 230781564276), INT64_C( 261602751044) } }, + { { INT64_C( 72614402642), INT64_C( 71956458345) }, + { INT32_C( 548307), INT32_C( 359931) }, + { INT32_C( 77396), INT32_C( 201469), -INT32_C( 468037), INT32_C( 993136)}, + INT8_C( 1), + { INT64_C( 293548128608), INT64_C( 216986335623) } }, + { { INT64_C( 54757095308), -INT64_C( 78647681491) }, + { -INT32_C( 391360), INT32_C( 956876) }, + { -INT32_C( 57677), -INT32_C( 975727), INT32_C( 913570), INT32_C( 949988)}, + INT8_C( 0), + { INT64_C( 99902036748), -INT64_C( 189027155595) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); + simde_int32x4_t v = simde_vld1q_s32(test_vec[i].v); + simde_int64x2_t r; + switch(test_vec[i].lane) { + case 0: r = simde_vqdmlal_laneq_s32(a, b, v, 0); break; + case 1: r = simde_vqdmlal_laneq_s32(a, b, v, 1); break; + case 2: r = simde_vqdmlal_laneq_s32(a, b, v, 2); break; + case 3: r = simde_vqdmlal_laneq_s32(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_s64(0); break; + } + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; + +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlalh_lane_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlals_lane_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlalh_laneq_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlals_laneq_s32) + +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlal_lane_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlal_lane_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlal_laneq_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlal_laneq_s32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qdmlal_n.c b/test/arm/neon/qdmlal_n.c new file mode 100644 index 000000000..24cada709 --- /dev/null +++ b/test/arm/neon/qdmlal_n.c @@ -0,0 +1,119 @@ +#define SIMDE_TEST_ARM_NEON_INSN qdmlal_n + +#include "test-neon.h" +#include "../../../simde/arm/neon/qdmlal_n.h" + +static int +test_simde_vqdmlal_n_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[4]; + int16_t c; + int32_t r[4]; + } test_vec[] = { + { { -INT32_C( 705275087), -INT32_C( 127604877), INT32_C( 965818803), -INT32_C( 877396080) }, + { -INT16_C( 9923), INT16_C( 4614), -INT16_C( 6282), INT16_C( 3021) }, + -INT16_C( 9646), + { -INT32_C( 513840571), -INT32_C( 216618165), INT32_C(1087011147), -INT32_C( 935677212) } }, + { { -INT32_C( 199952352), INT32_C( 215320872), -INT32_C( 731192087), INT32_C( 378983641) }, + { -INT16_C( 2857), INT16_C( 5541), INT16_C( 1348), -INT16_C( 145) }, + -INT16_C( 5488), + { -INT32_C( 168593920), INT32_C( 154502856), -INT32_C( 745987735), INT32_C( 380575161) } }, + { { INT32_C( 345416712), -INT32_C( 190883663), INT32_C( 531028224), -INT32_C( 971088060) }, + { INT16_C( 6843), INT16_C( 5246), INT16_C( 3959), -INT16_C( 5365) }, + -INT16_C( 1224), + { INT32_C( 328665048), -INT32_C( 203725871), INT32_C( 521336592), -INT32_C( 957954540) } }, + { { INT32_C( 431788782), INT32_C( 304549383), INT32_C( 187075575), INT32_C( 225465734) }, + { -INT16_C( 1437), -INT16_C( 1082), INT16_C( 8813), -INT16_C( 1659) }, + INT16_C( 7091), + { INT32_C( 411409248), INT32_C( 289204459), INT32_C( 312061541), INT32_C( 201937796) } }, + { { -INT32_C( 842968502), -INT32_C( 122274056), -INT32_C( 112994603), INT32_C( 752609068) }, + { -INT16_C( 4912), -INT16_C( 7400), -INT16_C( 1633), INT16_C( 1737) }, + -INT16_C( 6069), + { -INT32_C( 783346646), -INT32_C( 32452856), -INT32_C( 93173249), INT32_C( 731525362) } }, + { { INT32_C( 631739237), INT32_C( 980023855), -INT32_C( 761772772), INT32_C( 225384826) }, + { -INT16_C( 4239), INT16_C( 4739), INT16_C( 7055), -INT16_C( 1077) }, + INT16_C( 5831), + { INT32_C( 582304019), INT32_C(1035290073), -INT32_C( 679497362), INT32_C( 212824852) } }, + { { INT32_C( 650751202), INT32_C( 457968199), -INT32_C( 410210956), -INT32_C( 454399268) }, + { -INT16_C( 4801), -INT16_C( 2204), -INT16_C( 2220), INT16_C( 2537) }, + -INT16_C( 9442), + { INT32_C( 741413286), INT32_C( 499588535), -INT32_C( 368288476), -INT32_C( 502307976) } }, + { { INT32_C( 978331563), INT32_C( 479187906), -INT32_C( 939479917), -INT32_C( 326678923) }, + { -INT16_C( 7412), -INT16_C( 248), -INT16_C( 2162), INT16_C( 8922) }, + INT16_C( 3247), + { INT32_C( 930198035), INT32_C( 477577394), -INT32_C( 953519945), -INT32_C( 268739455) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); + int16_t c = test_vec[i].c; + simde_int32x4_t r = simde_vqdmlal_n_s16(a, b, c); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmlal_n_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[2]; + int32_t c; + int64_t r[2]; + } test_vec[] = { + { { -INT64_C( 376165768222), -INT64_C( 197324743023) }, + { INT32_C( 741465), INT32_C( 968010) }, + INT32_C( 66342), + { -INT64_C( 277785226162), -INT64_C( 68885304183) } }, + { { INT64_C( 251135060065), INT64_C( 497199393432) }, + { -INT32_C( 404762), -INT32_C( 230075) }, + INT32_C( 244631), + { INT64_C( 53100394421), INT64_C( 384632438782) } }, + { { -INT64_C( 988894543133), INT64_C( 682999452113) }, + { INT32_C( 260054), INT32_C( 105828) }, + INT32_C( 331400), + { -INT64_C( 816530751933), INT64_C( 753142250513) } }, + { { INT64_C( 705468518678), -INT64_C( 195976812580) }, + { INT32_C( 106637), -INT32_C( 254619) }, + INT32_C( 660228), + { INT64_C( 846277985150), -INT64_C( 532189998844) } }, + { { INT64_C( 494136024340), -INT64_C( 606948135213) }, + { -INT32_C( 691910), -INT32_C( 551857) }, + INT32_C( 155068), + { INT64_C( 279549824580), -INT64_C( 778098857765) } }, + { { INT64_C( 154024385460), INT64_C( 607600790766) }, + { INT32_C( 674527), -INT32_C( 109728) }, + INT32_C( 6749), + { INT64_C( 163129150906), INT64_C( 606119682222) } }, + { { -INT64_C( 422189543539), -INT64_C( 471989869341) }, + { INT32_C( 260659), INT32_C( 621255) }, + -INT32_C( 195916), + { -INT64_C( 524324080827), -INT64_C( 715417458501) } }, + { { INT64_C( 13880703167), INT64_C( 146406296842) }, + { -INT32_C( 403731), -INT32_C( 674292) }, + INT32_C( 504404), + { -INT64_C( 393406359481), -INT64_C( 533824867094) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); + int32_t c = test_vec[i].c; + simde_int64x2_t r = simde_vqdmlal_n_s32(a, b, c); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlal_n_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlal_n_s32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qdmlsl.c b/test/arm/neon/qdmlsl.c new file mode 100644 index 000000000..c2b9968e2 --- /dev/null +++ b/test/arm/neon/qdmlsl.c @@ -0,0 +1,224 @@ +#define SIMDE_TEST_ARM_NEON_INSN qdmlsl + +#include "test-neon.h" +#include "../../../simde/arm/neon/qdmlsl.h" + +static int +test_simde_vqdmlslh_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[1]; + int16_t b[1]; + int16_t c[1]; + int32_t r[1]; + } test_vec[] = { + { { -INT32_C( 7304045) }, + { -INT16_C( 4745) }, + { INT16_C( 7757)}, + { INT32_C( 66309885) } }, + { { -INT32_C( 91150936) }, + { INT16_C( 275) }, + { -INT16_C( 1162)}, + { -INT32_C( 90511836) } }, + { { INT32_C( 9182566) }, + { -INT16_C( 3247) }, + { -INT16_C( 1614)}, + { -INT32_C( 1298750) } }, + { { INT32_C( 54973448) }, + { INT16_C( 9255) }, + { INT16_C( 5744)}, + { -INT32_C( 51347992) } }, + { { -INT32_C( 97477178) }, + { -INT16_C( 9570) }, + { INT16_C( 5135)}, + { INT32_C( 806722) } }, + { { -INT32_C( 54320777) }, + { INT16_C( 1869) }, + { -INT16_C( 3076)}, + { -INT32_C( 42822689) } }, + { { INT32_C( 27872303) }, + { -INT16_C( 8110) }, + { -INT16_C( 8328)}, + { -INT32_C( 107207857) } }, + { { INT32_C( 67057391) }, + { -INT16_C( 2309) }, + { INT16_C( 1079)}, + { INT32_C( 72040213) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int32_t r = simde_vqdmlslh_s16(test_vec[i].a[0], test_vec[i].b[0], test_vec[i].c[0]); + + simde_assert_equal_i32(r, test_vec[i].r[0]); + } + + return 0; +} + +static int +test_simde_vqdmlsls_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[1]; + int32_t b[1]; + int32_t c[1]; + int64_t r[1]; + } test_vec[] = { + { { INT64_C( 30598753824) }, + { INT32_C( 472121) }, + { -INT32_C( 110072)}, + { INT64_C( 134533359248) } }, + { { -INT64_C( 879266448960) }, + { -INT32_C( 870787) }, + { INT32_C( 215247)}, + { -INT64_C( 504397870182) } }, + { { -INT64_C( 524706562706) }, + { -INT32_C( 267546) }, + { -INT32_C( 367178)}, + { -INT64_C( 721180573082) } }, + { { INT64_C( 141628624861) }, + { -INT32_C( 323091) }, + { -INT32_C( 964426)}, + { -INT64_C( 481566096671) } }, + { { INT64_C( 834712643290) }, + { INT32_C( 246066) }, + { INT32_C( 479461)}, + { INT64_C( 598754542438) } }, + { { -INT64_C( 688696419359) }, + { -INT32_C( 735157) }, + { -INT32_C( 39524)}, + { -INT64_C( 746809109895) } }, + { { INT64_C( 903973493156) }, + { -INT32_C( 527450) }, + { -INT32_C( 900523)}, + { -INT64_C( 45988219544) } }, + { { INT64_C( 978260666802) }, + { INT32_C( 529515) }, + { -INT32_C( 590095)}, + { INT64_C( 1603188974652) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int64_t r = simde_vqdmlsls_s32(test_vec[i].a[0], test_vec[i].b[0], test_vec[i].c[0]); + + simde_assert_equal_i64(r, test_vec[i].r[0]); + } + + return 0; +} + +static int +test_simde_vqdmlsl_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[4]; + int16_t c[4]; + int32_t r[4]; + } test_vec[] = { + { { -INT32_C( 43220716), -INT32_C( 19739561), INT32_C( 79041776), INT32_C( 57556654) }, + { INT16_C( 8044), -INT16_C( 4160), -INT16_C( 8934), INT16_C( 9184) }, + { -INT16_C( 6473), INT16_C( 5685), -INT16_C( 5752), INT16_C( 6515)}, + { INT32_C( 60916908), INT32_C( 27559639), -INT32_C( 23734960), -INT32_C( 62110866) } }, + { { -INT32_C( 17039563), INT32_C( 393668), INT32_C( 79830887), INT32_C( 97106023) }, + { -INT16_C( 2620), INT16_C( 2519), -INT16_C( 9767), INT16_C( 2499) }, + { INT16_C( 1205), INT16_C( 7592), INT16_C( 5902), INT16_C( 5791)}, + { -INT32_C( 10725363), -INT32_C( 37854828), INT32_C( 195120555), INT32_C( 68162605) } }, + { { -INT32_C( 23439805), INT32_C( 58026650), -INT32_C( 5180845), INT32_C( 30866311) }, + { -INT16_C( 8571), -INT16_C( 8928), INT16_C( 3895), -INT16_C( 9771) }, + { -INT16_C( 3876), INT16_C( 8898), -INT16_C( 9394), -INT16_C( 6781)}, + { -INT32_C( 89882197), INT32_C( 216909338), INT32_C( 67998415), -INT32_C( 101647991) } }, + { { INT32_C( 36239852), -INT32_C( 51532048), -INT32_C( 71094540), -INT32_C( 44844918) }, + { -INT16_C( 190), INT16_C( 446), -INT16_C( 752), INT16_C( 8001) }, + { -INT16_C( 6821), INT16_C( 4981), INT16_C( 5319), INT16_C( 9223)}, + { INT32_C( 33647872), -INT32_C( 55975100), -INT32_C( 63094764), -INT32_C( 192431364) } }, + { { -INT32_C( 3015366), INT32_C( 11701950), -INT32_C( 53181128), INT32_C( 35548712) }, + { -INT16_C( 2890), INT16_C( 7657), INT16_C( 8388), INT16_C( 4823) }, + { -INT16_C( 1859), INT16_C( 7752), -INT16_C( 5345), INT16_C( 6927)}, + { -INT32_C( 13760386), -INT32_C( 107012178), INT32_C( 36486592), -INT32_C( 31269130) } }, + { { INT32_C( 29565525), -INT32_C( 44405017), INT32_C( 72557961), INT32_C( 76709539) }, + { INT16_C( 6757), INT16_C( 8764), INT16_C( 1978), -INT16_C( 4268) }, + { INT16_C( 2772), -INT16_C( 9818), INT16_C( 8865), INT16_C( 5166)}, + { -INT32_C( 7895283), INT32_C( 127684887), INT32_C( 37488021), INT32_C( 120806515) } }, + { { INT32_C( 89212585), -INT32_C( 71615372), -INT32_C( 12427788), -INT32_C( 40736248) }, + { -INT16_C( 1166), INT16_C( 6690), INT16_C( 7767), -INT16_C( 5984) }, + { -INT16_C( 9261), -INT16_C( 1037), INT16_C( 6708), INT16_C( 4340)}, + { INT32_C( 67615933), -INT32_C( 57740312), -INT32_C( 116629860), INT32_C( 11204872) } }, + { { INT32_C( 3969110), INT32_C( 8492563), INT32_C( 23842348), INT32_C( 36300877) }, + { INT16_C( 8057), -INT16_C( 9339), INT16_C( 1806), INT16_C( 8600) }, + { -INT16_C( 751), -INT16_C( 6991), INT16_C( 1494), -INT16_C( 6795)}, + { INT32_C( 16070724), -INT32_C( 122085335), INT32_C( 18446020), INT32_C( 153174877) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); + simde_int16x4_t c = simde_vld1_s16(test_vec[i].c); + simde_int32x4_t r = simde_vqdmlsl_s16(a, b, c); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmlsl_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[2]; + int32_t c[2]; + int64_t r[2]; + } test_vec[] = { + { { INT64_C( 826455899656), INT64_C( 945976857854) }, + { -INT32_C( 449214), INT32_C( 605955) }, + { -INT32_C( 482717), INT32_C( 781825)}, + { INT64_C( 392769430780), -INT64_C( 1524677896) } }, + { { INT64_C( 835289013120), INT64_C( 749191377169) }, + { -INT32_C( 173814), INT32_C( 482498) }, + { INT32_C( 251796), -INT32_C( 454500)}, + { INT64_C( 922820353008), INT64_C( 1187782059169) } }, + { { -INT64_C( 659348795652), -INT64_C( 298665949347) }, + { -INT32_C( 841301), INT32_C( 216093) }, + { INT32_C( 285802), INT32_C( 17051)}, + { -INT64_C( 178457778848), -INT64_C( 306035152833) } }, + { { INT64_C( 920729567534), INT64_C( 341740430374) }, + { INT32_C( 765549), INT32_C( 219031) }, + { INT32_C( 556311), -INT32_C( 538859)}, + { INT64_C( 68962908056), INT64_C( 577794081632) } }, + { { INT64_C( 680261439855), -INT64_C( 555740744485) }, + { -INT32_C( 735475), INT32_C( 633819) }, + { -INT32_C( 775268), -INT32_C( 352496)}, + { -INT64_C( 460119024745), -INT64_C( 108903420037) } }, + { { -INT64_C( 730657729147), INT64_C( 663399100769) }, + { -INT32_C( 769318), INT32_C( 827092) }, + { -INT32_C( 784599), INT32_C( 40219)}, + { -INT64_C( 1937869996111), INT64_C( 596869474473) } }, + { { INT64_C( 875398356494), INT64_C( 395367813293) }, + { INT32_C( 388462), INT32_C( 192181) }, + { -INT32_C( 442452), INT32_C( 110220)}, + { INT64_C( 1219149934142), INT64_C( 353003433653) } }, + { { INT64_C( 138304755295), -INT64_C( 639741888131) }, + { -INT32_C( 305245), -INT32_C( 548274) }, + { -INT32_C( 805474), INT32_C( 431866)}, + { -INT64_C( 353429066965), -INT64_C( 166180089563) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); + simde_int32x2_t c = simde_vld1_s32(test_vec[i].c); + simde_int64x2_t r = simde_vqdmlsl_s32(a, b, c); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlslh_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlsls_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlsl_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlsl_s32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qdmlsl_high.c b/test/arm/neon/qdmlsl_high.c new file mode 100644 index 000000000..a3a41545e --- /dev/null +++ b/test/arm/neon/qdmlsl_high.c @@ -0,0 +1,136 @@ +#define SIMDE_TEST_ARM_NEON_INSN qdmlsl_high + +#include "test-neon.h" +#include "../../../simde/arm/neon/qdmlsl_high.h" + +static int +test_simde_vqdmlsl_high_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[8]; + int16_t c[8]; + int32_t r[4]; + } test_vec[] = { + { { -INT32_C( 747885390), INT32_C( 423110881), -INT32_C( 937077694), -INT32_C( 483181547) }, + { INT16_C( 3292), INT16_C( 8866), -INT16_C( 1582), INT16_C( 8444), + INT16_C( 223), INT16_C( 6504), INT16_C( 8252), -INT16_C( 5956) }, + { INT16_C( 733), -INT16_C( 7182), -INT16_C( 9403), -INT16_C( 2116), + -INT16_C( 6042), -INT16_C( 2530), -INT16_C( 7860), INT16_C( 1453)}, + { -INT32_C( 745190658), INT32_C( 456021121), -INT32_C( 807356254), -INT32_C( 465873411) } }, + { { INT32_C( 70925407), INT32_C( 626232695), -INT32_C( 252554284), INT32_C( 431103080) }, + { INT16_C( 7119), INT16_C( 5510), -INT16_C( 8082), INT16_C( 5127), + -INT16_C( 9975), INT16_C( 2746), INT16_C( 1268), INT16_C( 2762) }, + { INT16_C( 649), -INT16_C( 8763), -INT16_C( 4572), INT16_C( 6117), + INT16_C( 8432), INT16_C( 4649), INT16_C( 4030), -INT16_C( 2755)}, + { INT32_C( 239143807), INT32_C( 600700387), -INT32_C( 262774364), INT32_C( 446321700) } }, + { { -INT32_C( 536689326), -INT32_C( 139523788), -INT32_C( 125270464), INT32_C( 873332710) }, + { -INT16_C( 8122), -INT16_C( 7639), INT16_C( 8362), -INT16_C( 1175), + INT16_C( 447), -INT16_C( 8891), -INT16_C( 7641), -INT16_C( 1787) }, + { -INT16_C( 6426), INT16_C( 4090), -INT16_C( 5831), -INT16_C( 5382), + INT16_C( 1335), INT16_C( 3725), INT16_C( 6097), -INT16_C( 3746)}, + { -INT32_C( 537882816), -INT32_C( 73285838), -INT32_C( 32096110), INT32_C( 859944506) } }, + { { INT32_C( 901033376), -INT32_C( 849542112), INT32_C( 141308212), -INT32_C( 801097465) }, + { INT16_C( 8881), -INT16_C( 8697), INT16_C( 8608), -INT16_C( 8131), + INT16_C( 8520), -INT16_C( 663), -INT16_C( 1900), -INT16_C( 210) }, + { -INT16_C( 9769), INT16_C( 6187), -INT16_C( 6074), -INT16_C( 5166), + -INT16_C( 17), INT16_C( 9183), INT16_C( 43), INT16_C( 8961)}, + { INT32_C( 901323056), -INT32_C( 837365454), INT32_C( 141471612), -INT32_C( 797333845) } }, + { { -INT32_C( 744968491), INT32_C( 963308292), -INT32_C( 201343108), -INT32_C( 536196437) }, + { INT16_C( 9912), INT16_C( 6082), -INT16_C( 1482), INT16_C( 5248), + INT16_C( 8622), -INT16_C( 2872), -INT16_C( 5619), INT16_C( 3797) }, + { INT16_C( 3860), INT16_C( 9854), INT16_C( 1259), INT16_C( 2424), + INT16_C( 5041), -INT16_C( 9041), -INT16_C( 5325), INT16_C( 2802)}, + { -INT32_C( 831895495), INT32_C( 911376788), -INT32_C( 261185458), -INT32_C( 557474825) } }, + { { -INT32_C( 147095240), -INT32_C( 382346379), -INT32_C( 200297616), INT32_C( 248079928) }, + { INT16_C( 5753), INT16_C( 1550), INT16_C( 6057), INT16_C( 8103), + -INT16_C( 3757), -INT16_C( 588), -INT16_C( 6897), INT16_C( 2868) }, + { -INT16_C( 6337), -INT16_C( 2298), -INT16_C( 9346), -INT16_C( 1545), + -INT16_C( 1912), INT16_C( 4849), -INT16_C( 5850), -INT16_C( 4585)}, + { -INT32_C( 161462008), -INT32_C( 376643955), -INT32_C( 280992516), INT32_C( 274379488) } }, + { { INT32_C( 847470746), INT32_C( 739890236), INT32_C( 605341917), INT32_C( 394630673) }, + { INT16_C( 7092), -INT16_C( 221), -INT16_C( 1527), INT16_C( 652), + INT16_C( 2190), -INT16_C( 4343), -INT16_C( 16), INT16_C( 9776) }, + { -INT16_C( 7960), -INT16_C( 2053), -INT16_C( 3342), -INT16_C( 8317), + INT16_C( 9714), INT16_C( 6319), INT16_C( 2207), -INT16_C( 6524)}, + { INT32_C( 804923426), INT32_C( 794777070), INT32_C( 605412541), INT32_C( 522187921) } }, + { { -INT32_C( 351695750), -INT32_C( 251638833), -INT32_C( 901741340), -INT32_C( 143936069) }, + { INT16_C( 6687), INT16_C( 8399), INT16_C( 7245), -INT16_C( 4786), + -INT16_C( 1077), -INT16_C( 3605), -INT16_C( 9048), INT16_C( 5699) }, + { -INT16_C( 4202), -INT16_C( 2749), INT16_C( 5485), INT16_C( 6722), + INT16_C( 2934), INT16_C( 6380), INT16_C( 6638), -INT16_C( 8437)}, + { -INT32_C( 345375914), -INT32_C( 205639033), -INT32_C( 781620092), -INT32_C( 47771143) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + simde_int16x8_t c = simde_vld1q_s16(test_vec[i].c); + simde_int32x4_t r = simde_vqdmlsl_high_s16(a, b, c); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmlsl_high_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[4]; + int32_t c[4]; + int64_t r[2]; + } test_vec[] = { + { { INT64_C( 253531438173), -INT64_C( 977771259694) }, + { INT32_C( 986427), -INT32_C( 362413), -INT32_C( 849434), -INT32_C( 890362) }, + { -INT32_C( 70267), INT32_C( 699087), INT32_C( 816843), -INT32_C( 995287)}, + { INT64_C( 1641239871897), -INT64_C( 2750102707482) } }, + { { INT64_C( 477194517593), -INT64_C( 330738868250) }, + { -INT32_C( 402288), INT32_C( 441145), INT32_C( 243046), -INT32_C( 391873) }, + { -INT32_C( 738110), INT32_C( 85130), -INT32_C( 533651), INT32_C( 381814)}, + { INT64_C( 736597999485), -INT64_C( 31493673006) } }, + { { INT64_C( 681338659209), -INT64_C( 933208547020) }, + { -INT32_C( 718590), INT32_C( 65023), INT32_C( 152877), -INT32_C( 72901) }, + { -INT32_C( 9606), -INT32_C( 60987), -INT32_C( 717749), -INT32_C( 625166)}, + { INT64_C( 900793286955), -INT64_C( 1024359000152) } }, + { { INT64_C( 972110347760), INT64_C( 806873393212) }, + { -INT32_C( 388297), INT32_C( 455037), -INT32_C( 476932), INT32_C( 132689) }, + { -INT32_C( 870554), INT32_C( 287811), -INT32_C( 989866), -INT32_C( 996621)}, + { INT64_C( 27912805536), INT64_C( 1071354680950) } }, + { { INT64_C( 744227908851), INT64_C( 217341748577) }, + { INT32_C( 875192), -INT32_C( 451221), -INT32_C( 525998), -INT32_C( 403587) }, + { -INT32_C( 261659), INT32_C( 128275), -INT32_C( 618743), INT32_C( 205123)}, + { INT64_C( 93312747823), INT64_C( 382911700979) } }, + { { -INT64_C( 66370325823), -INT64_C( 749746646109) }, + { -INT32_C( 636526), INT32_C( 370816), -INT32_C( 800610), INT32_C( 351071) }, + { -INT32_C( 793821), INT32_C( 254667), INT32_C( 481345), INT32_C( 224270)}, + { INT64_C( 704368915077), -INT64_C( 907216032449) } }, + { { -INT64_C( 246022628638), -INT64_C( 485535234684) }, + { INT32_C( 417350), -INT32_C( 297899), INT32_C( 120115), -INT32_C( 523418) }, + { INT32_C( 402583), -INT32_C( 423284), INT32_C( 751713), -INT32_C( 207976)}, + { -INT64_C( 426606642628), -INT64_C( 703251998620) } }, + { { -INT64_C( 737734691398), INT64_C( 257791861690) }, + { -INT32_C( 3312), INT32_C( 750732), INT32_C( 965480), -INT32_C( 996117) }, + { INT32_C( 256052), INT32_C( 522416), INT32_C( 150123), -INT32_C( 381846)}, + { -INT64_C( 1027616199478), -INT64_C( 502934722274) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + simde_int32x4_t c = simde_vld1q_s32(test_vec[i].c); + simde_int64x2_t r = simde_vqdmlsl_high_s32(a, b, c); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlsl_high_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlsl_high_s32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qdmlsl_high_lane.c b/test/arm/neon/qdmlsl_high_lane.c new file mode 100644 index 000000000..5ce543d31 --- /dev/null +++ b/test/arm/neon/qdmlsl_high_lane.c @@ -0,0 +1,295 @@ +#define SIMDE_TEST_ARM_NEON_INSN qdmlsl_high_lane + +#include "test-neon.h" +#include "../../../simde/arm/neon/qdmlsl_high_lane.h" + +static int +test_simde_vqdmlsl_high_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[8]; + int16_t v[4]; + int8_t lane; + int32_t r[4]; + } test_vec[] = { + { { INT32_C( 701766005), -INT32_C( 532299217), -INT32_C( 692528001), -INT32_C( 718067011) }, + { INT16_C( 5655), -INT16_C( 7895), -INT16_C( 2651), INT16_C( 6636), + -INT16_C( 1285), -INT16_C( 2290), -INT16_C( 6574), -INT16_C( 3871) }, + { INT16_C( 7190), -INT16_C( 9951), -INT16_C( 4444), INT16_C( 9020)}, + INT8_C( 0), + { INT32_C( 720244305), -INT32_C( 499369017), -INT32_C( 597993881), -INT32_C( 662402031) } }, + { { -INT32_C( 999286111), INT32_C( 448217377), -INT32_C( 977134270), -INT32_C( 615754924) }, + { INT16_C( 9385), INT16_C( 6468), INT16_C( 1459), INT16_C( 1745), + -INT16_C( 5037), -INT16_C( 1963), INT16_C( 1155), -INT16_C( 4707) }, + { -INT16_C( 154), INT16_C( 1256), -INT16_C( 6295), INT16_C( 8502)}, + INT8_C( 2), + { -INT32_C(1062701941), INT32_C( 423503207), -INT32_C( 962592820), -INT32_C( 675016054) } }, + { { -INT32_C( 166972908), -INT32_C( 688547594), -INT32_C( 958428321), -INT32_C( 172358765) }, + { INT16_C( 3599), -INT16_C( 4294), INT16_C( 4651), -INT16_C( 4778), + -INT16_C( 3374), -INT16_C( 1032), -INT16_C( 469), INT16_C( 5690) }, + { INT16_C( 5429), -INT16_C( 7133), INT16_C( 4223), -INT16_C( 2009)}, + INT8_C( 2), + { -INT32_C( 138476104), -INT32_C( 679831322), -INT32_C( 954467147), -INT32_C( 220416505) } }, + { { -INT32_C( 25288675), INT32_C( 236811221), INT32_C( 833326174), -INT32_C( 591405948) }, + { -INT16_C( 6115), -INT16_C( 476), INT16_C( 6029), -INT16_C( 7903), + INT16_C( 3519), -INT16_C( 7182), -INT16_C( 346), -INT16_C( 2174) }, + { INT16_C( 1767), -INT16_C( 8420), INT16_C( 7315), INT16_C( 904)}, + INT8_C( 2), + { -INT32_C( 76771645), INT32_C( 341883881), INT32_C( 838388154), -INT32_C( 559600328) } }, + { { -INT32_C( 855336629), INT32_C( 906845897), -INT32_C( 251633756), INT32_C( 8482797) }, + { -INT16_C( 2575), -INT16_C( 3957), -INT16_C( 5600), -INT16_C( 8026), + -INT16_C( 8582), INT16_C( 4016), INT16_C( 8578), INT16_C( 1477) }, + { -INT16_C( 3356), -INT16_C( 6870), INT16_C( 2621), -INT16_C( 4607)}, + INT8_C( 2), + { -INT32_C( 810349785), INT32_C( 885794025), -INT32_C( 296599632), INT32_C( 740363) } }, + { { INT32_C( 847514568), -INT32_C( 318708549), INT32_C( 900685617), -INT32_C( 505950930) }, + { -INT16_C( 9272), INT16_C( 4269), -INT16_C( 9927), INT16_C( 2036), + -INT16_C( 6639), INT16_C( 2316), -INT16_C( 9188), INT16_C( 7193) }, + { INT16_C( 2164), INT16_C( 7350), -INT16_C( 626), INT16_C( 5339)}, + INT8_C( 3), + { INT32_C( 918405810), -INT32_C( 343438797), INT32_C( 998795081), -INT32_C( 582757784) } }, + { { INT32_C( 804918704), -INT32_C( 524071003), -INT32_C( 986920723), INT32_C( 956365470) }, + { -INT16_C( 9578), -INT16_C( 3266), INT16_C( 9986), INT16_C( 1944), + -INT16_C( 2569), -INT16_C( 7603), INT16_C( 5080), INT16_C( 9340) }, + { -INT16_C( 6408), INT16_C( 6393), -INT16_C( 7501), -INT16_C( 8185)}, + INT8_C( 1), + { INT32_C( 837765938), -INT32_C( 426859045), -INT32_C(1051873603), INT32_C( 836944230) } }, + { { -INT32_C( 413669468), -INT32_C( 82188184), -INT32_C( 781850974), INT32_C( 570418568) }, + { INT16_C( 4896), INT16_C( 8573), INT16_C( 6139), INT16_C( 691), + -INT16_C( 6651), -INT16_C( 3345), INT16_C( 3473), -INT16_C( 9775) }, + { -INT16_C( 870), -INT16_C( 4925), -INT16_C( 9268), -INT16_C( 2864)}, + INT8_C( 0), + { -INT32_C( 425242208), -INT32_C( 88008484), -INT32_C( 775807954), INT32_C( 553410068) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + simde_int16x4_t v = simde_vld1_s16(test_vec[i].v); + simde_int32x4_t r; + SIMDE_CONSTIFY_4_(simde_vqdmlsl_high_lane_s16, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmlsl_high_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[8]; + int16_t v[8]; + int8_t lane; + int32_t r[4]; + } test_vec[] = { + { { -INT32_C( 872780517), -INT32_C( 29391408), INT32_C( 45946525), -INT32_C( 832013542) }, + { -INT16_C( 6241), INT16_C( 4618), -INT16_C( 2647), -INT16_C( 962), + -INT16_C( 6662), INT16_C( 8291), INT16_C( 9815), -INT16_C( 8604) }, + { INT16_C( 2109), INT16_C( 5858), INT16_C( 7276), -INT16_C( 9607), + -INT16_C( 1930), INT16_C( 8982), -INT16_C( 9729), INT16_C( 7886)}, + INT8_C( 3), + { -INT32_C(1000784185), INT32_C( 129911866), INT32_C( 234531935), -INT32_C( 997330798) } }, + { { -INT32_C( 965454540), INT32_C( 656372837), -INT32_C( 660432140), INT32_C( 439902238) }, + { -INT16_C( 8190), INT16_C( 9341), INT16_C( 684), INT16_C( 5555), + -INT16_C( 4936), -INT16_C( 7543), INT16_C( 994), -INT16_C( 3774) }, + { -INT16_C( 564), -INT16_C( 8121), -INT16_C( 5137), -INT16_C( 9349), + -INT16_C( 116), INT16_C( 4315), -INT16_C( 4968), -INT16_C( 3655)}, + INT8_C( 7), + { -INT32_C(1001536700), INT32_C( 601233507), -INT32_C( 653166000), INT32_C( 412314298) } }, + { { -INT32_C( 907415859), INT32_C( 509071204), INT32_C( 710920080), INT32_C( 225257492) }, + { INT16_C( 8228), -INT16_C( 7737), INT16_C( 4393), INT16_C( 2491), + -INT16_C( 1466), -INT16_C( 8359), INT16_C( 9611), -INT16_C( 8865) }, + { INT16_C( 3460), -INT16_C( 9079), INT16_C( 7019), INT16_C( 8362), + -INT16_C( 8490), -INT16_C( 5629), INT16_C( 7713), INT16_C( 2152)}, + INT8_C( 7), + { -INT32_C( 901106195), INT32_C( 545048340), INT32_C( 669554336), INT32_C( 263412452) } }, + { { -INT32_C( 764998060), INT32_C( 455574662), INT32_C( 610461886), -INT32_C( 371876860) }, + { -INT16_C( 3285), -INT16_C( 6882), -INT16_C( 9633), INT16_C( 1704), + INT16_C( 6596), INT16_C( 4573), -INT16_C( 6582), -INT16_C( 6057) }, + { -INT16_C( 1845), -INT16_C( 3438), -INT16_C( 8109), -INT16_C( 4210), + -INT16_C( 9654), -INT16_C( 4565), -INT16_C( 6427), -INT16_C( 2142)}, + INT8_C( 1), + { -INT32_C( 719643964), INT32_C( 487018610), INT32_C( 565204054), -INT32_C( 413524792) } }, + { { -INT32_C( 587174221), INT32_C( 56513196), -INT32_C( 442232801), -INT32_C( 572018074) }, + { INT16_C( 6729), INT16_C( 1327), -INT16_C( 6140), INT16_C( 6552), + -INT16_C( 519), -INT16_C( 2872), INT16_C( 7615), -INT16_C( 5938) }, + { INT16_C( 8850), INT16_C( 3637), INT16_C( 1171), INT16_C( 4708), + -INT16_C( 4234), INT16_C( 249), -INT16_C( 4600), INT16_C( 9135)}, + INT8_C( 2), + { -INT32_C( 585958723), INT32_C( 63239420), -INT32_C( 460067131), -INT32_C( 558111278) } }, + { { -INT32_C( 811954240), INT32_C( 760955593), INT32_C( 368199597), INT32_C( 748371992) }, + { INT16_C( 8111), -INT16_C( 5698), -INT16_C( 3905), -INT16_C( 7403), + -INT16_C( 8976), INT16_C( 2047), INT16_C( 8951), INT16_C( 6885) }, + { INT16_C( 8865), INT16_C( 389), INT16_C( 9139), -INT16_C( 4946), + -INT16_C( 8242), INT16_C( 9625), -INT16_C( 3018), -INT16_C( 8395)}, + INT8_C( 7), + { -INT32_C( 962661280), INT32_C( 795324723), INT32_C( 518486887), INT32_C( 863971142) } }, + { { INT32_C( 666989284), INT32_C( 215517931), -INT32_C( 860315495), INT32_C( 563015594) }, + { -INT16_C( 1132), INT16_C( 1616), INT16_C( 1479), INT16_C( 6440), + INT16_C( 1118), -INT16_C( 1105), INT16_C( 2256), INT16_C( 7406) }, + { -INT16_C( 4130), INT16_C( 7931), -INT16_C( 8746), -INT16_C( 7137), + -INT16_C( 7158), -INT16_C( 3725), -INT16_C( 8565), -INT16_C( 8470)}, + INT8_C( 4), + { INT32_C( 682994572), INT32_C( 199698751), -INT32_C( 828018599), INT32_C( 669039890) } }, + { { INT32_C( 307095870), -INT32_C( 837445993), -INT32_C( 866521360), -INT32_C( 74057105) }, + { INT16_C( 4322), -INT16_C( 8839), INT16_C( 4483), -INT16_C( 9545), + INT16_C( 9300), INT16_C( 1882), -INT16_C( 2439), INT16_C( 4037) }, + { INT16_C( 3), -INT16_C( 2248), INT16_C( 1570), -INT16_C( 404), + -INT16_C( 662), -INT16_C( 7109), -INT16_C( 7322), INT16_C( 4903)}, + INT8_C( 5), + { INT32_C( 439323270), -INT32_C( 810687717), -INT32_C( 901199062), -INT32_C( 16659039) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + simde_int16x8_t v = simde_vld1q_s16(test_vec[i].v); + simde_int32x4_t r; + SIMDE_CONSTIFY_8_(simde_vqdmlsl_high_laneq_s16, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmlsl_high_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[4]; + int32_t v[4]; + int8_t lane; + int64_t r[2]; + } test_vec[] = { + { { INT64_C( 790453197700), INT64_C( 961966501875) }, + { -INT32_C( 242762), INT32_C( 150897), -INT32_C( 329656), INT32_C( 660855) }, + { INT32_C( 395325), INT32_C( 901629), INT32_C( 925388), INT32_C( 858125)}, + INT8_C( 3), + { INT64_C( 1356225307700), -INT64_C( 172225891875) } }, + { { INT64_C( 849068137006), INT64_C( 549844025749) }, + { -INT32_C( 246423), -INT32_C( 830627), INT32_C( 769223), INT32_C( 785424) }, + { -INT32_C( 831951), INT32_C( 945136), INT32_C( 341523), -INT32_C( 325349)}, + INT8_C( 2), + { INT64_C( 323653443748), INT64_C( 13363304245) } }, + { { -INT64_C( 277194980408), INT64_C( 942116532917) }, + { -INT32_C( 817506), INT32_C( 354593), -INT32_C( 665530), INT32_C( 564673) }, + { -INT32_C( 621727), -INT32_C( 543692), INT32_C( 238057), -INT32_C( 232370)}, + INT8_C( 3), + { -INT64_C( 586493392608), INT64_C( 1204542662937) } }, + { { INT64_C( 604790769746), INT64_C( 204872498322) }, + { -INT32_C( 983351), INT32_C( 745045), INT32_C( 414096), -INT32_C( 956641) }, + { -INT32_C( 873296), -INT32_C( 180336), INT32_C( 394178), -INT32_C( 507014)}, + INT8_C( 1), + { INT64_C( 754143602258), -INT64_C( 140161124430) } }, + { { INT64_C( 553446037926), -INT64_C( 431114272015) }, + { INT32_C( 960473), INT32_C( 562842), -INT32_C( 219505), INT32_C( 667766) }, + { -INT32_C( 138937), -INT32_C( 552203), -INT32_C( 523153), INT32_C( 246820)}, + INT8_C( 1), + { INT64_C( 311023398896), INT64_C( 306370504981) } }, + { { INT64_C( 541441643880), INT64_C( 713931659611) }, + { INT32_C( 987082), INT32_C( 822929), INT32_C( 919119), INT32_C( 971988) }, + { -INT32_C( 609984), -INT32_C( 925110), INT32_C( 821157), INT32_C( 173498)}, + INT8_C( 3), + { INT64_C( 222511027356), INT64_C( 376655711563) } }, + { { INT64_C( 816569519115), INT64_C( 970025023207) }, + { INT32_C( 303874), -INT32_C( 715866), -INT32_C( 487220), INT32_C( 61968) }, + { INT32_C( 89628), INT32_C( 361735), INT32_C( 443557), -INT32_C( 149088)}, + INT8_C( 0), + { INT64_C( 903906627435), INT64_C( 958916887399) } }, + { { INT64_C( 771564280879), -INT64_C( 493898875902) }, + { -INT32_C( 659961), INT32_C( 859353), -INT32_C( 225375), -INT32_C( 564812) }, + { -INT32_C( 515031), INT32_C( 410037), -INT32_C( 630721), -INT32_C( 183181)}, + INT8_C( 2), + { INT64_C( 487266790129), -INT64_C( 1206376454806) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + simde_int32x4_t v = simde_vld1q_s32(test_vec[i].v); + simde_int64x2_t r; + SIMDE_CONSTIFY_4_(simde_vqdmlsl_high_laneq_s32, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmlsl_high_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[4]; + int32_t v[2]; + int8_t lane; + int64_t r[2]; + } test_vec[] = { + { { -INT64_C( 446900198398), -INT64_C( 329881688649) }, + { INT32_C( 974292), -INT32_C( 655559), INT32_C( 171874), INT32_C( 238794) }, + { INT32_C( 13385), -INT32_C( 765431)}, + INT8_C( 0), + { -INT64_C( 451501265378), -INT64_C( 336274204029) } }, + { { -INT64_C( 140891827426), INT64_C( 381859766505) }, + { INT32_C( 740832), -INT32_C( 598547), INT32_C( 834175), INT32_C( 79386) }, + { -INT32_C( 193689), -INT32_C( 869124)}, + INT8_C( 1), + { INT64_C( 1309111197974), INT64_C( 519852322233) } }, + { { -INT64_C( 53296665764), -INT64_C( 704121513624) }, + { INT32_C( 9898), -INT32_C( 171625), INT32_C( 725625), INT32_C( 204408) }, + { -INT32_C( 229616), -INT32_C( 351235)}, + INT8_C( 0), + { INT64_C( 279933554236), -INT64_C( 610250818968) } }, + { { -INT64_C( 116940462675), INT64_C( 381748739822) }, + { INT32_C( 593056), -INT32_C( 54191), INT32_C( 428212), -INT32_C( 261544) }, + { -INT32_C( 116854), INT32_C( 13856)}, + INT8_C( 1), + { -INT64_C( 128807073619), INT64_C( 388996647150) } }, + { { INT64_C( 222385528219), INT64_C( 43492618314) }, + { -INT32_C( 383545), INT32_C( 34688), -INT32_C( 843697), -INT32_C( 28956) }, + { INT32_C( 305790), -INT32_C( 708575)}, + INT8_C( 1), + { -INT64_C( 973259675331), INT64_C( 2457622914) } }, + { { INT64_C( 886360405211), INT64_C( 227244724453) }, + { -INT32_C( 857821), -INT32_C( 985197), -INT32_C( 228703), -INT32_C( 790608) }, + { -INT32_C( 42516), INT32_C( 379665)}, + INT8_C( 1), + { INT64_C( 1060021454201), INT64_C( 827577097093) } }, + { { INT64_C( 513652570910), INT64_C( 6598367959) }, + { -INT32_C( 178555), -INT32_C( 468729), INT32_C( 810266), -INT32_C( 620168) }, + { INT32_C( 540665), -INT32_C( 177714)}, + INT8_C( 0), + { -INT64_C( 362512362870), INT64_C( 677204631399) } }, + { { INT64_C( 493154627538), -INT64_C( 534064624380) }, + { INT32_C( 801415), -INT32_C( 734570), INT32_C( 494571), INT32_C( 423838) }, + { INT32_C( 310134), INT32_C( 105922)}, + INT8_C( 1), + { INT64_C( 388382728614), -INT64_C( 623852161652) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + simde_int32x2_t v = simde_vld1_s32(test_vec[i].v); + simde_int64x2_t r; + SIMDE_CONSTIFY_2_(simde_vqdmlsl_high_lane_s32, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlsl_high_lane_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlsl_high_lane_s32) + +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlsl_high_laneq_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlsl_high_laneq_s32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qdmlsl_high_n.c b/test/arm/neon/qdmlsl_high_n.c new file mode 100644 index 000000000..b83f2d42f --- /dev/null +++ b/test/arm/neon/qdmlsl_high_n.c @@ -0,0 +1,127 @@ +#define SIMDE_TEST_ARM_NEON_INSN qdmlsl_high_n + +#include "test-neon.h" +#include "../../../simde/arm/neon/qdmlsl_high_n.h" + +static int +test_simde_vqdmlsl_high_n_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[8]; + int16_t c; + int32_t r[4]; + } test_vec[] = { + { { -INT32_C( 655584482), -INT32_C( 721040519), -INT32_C( 4516362), INT32_C( 510063637) }, + { -INT16_C( 5717), INT16_C( 6177), INT16_C( 153), -INT16_C( 1588), + -INT16_C( 6665), INT16_C( 6259), INT16_C( 3384), INT16_C( 8410) }, + -INT16_C( 9983), + { -INT32_C( 788657872), -INT32_C( 596073325), INT32_C( 63048582), INT32_C( 677977697) } }, + { { INT32_C( 443613837), -INT32_C( 161043501), INT32_C( 110704618), -INT32_C( 962257269) }, + { INT16_C( 59), -INT16_C( 1856), INT16_C( 4217), INT16_C( 3280), + -INT16_C( 6369), -INT16_C( 8539), INT16_C( 8599), -INT16_C( 1395) }, + -INT16_C( 6689), + { INT32_C( 358409355), -INT32_C( 275278243), INT32_C( 225742040), -INT32_C( 980919579) } }, + { { -INT32_C( 851285066), INT32_C( 392036614), -INT32_C( 390856728), -INT32_C( 522161257) }, + { -INT16_C( 1292), -INT16_C( 7545), INT16_C( 2123), INT16_C( 132), + INT16_C( 4355), INT16_C( 5159), -INT16_C( 3713), INT16_C( 4367) }, + -INT16_C( 8562), + { -INT32_C( 776710046), INT32_C( 480379330), -INT32_C( 454438140), -INT32_C( 447380749) } }, + { { INT32_C( 528779912), INT32_C( 848404208), INT32_C( 109131611), -INT32_C( 774639392) }, + { -INT16_C( 3475), INT16_C( 3429), INT16_C( 3780), -INT16_C( 9623), + -INT16_C( 9851), INT16_C( 5785), -INT16_C( 5027), -INT16_C( 2381) }, + INT16_C( 3357), + { INT32_C( 594919526), INT32_C( 809563718), INT32_C( 142882889), -INT32_C( 758653358) } }, + { { INT32_C( 195157916), -INT32_C( 601064983), INT32_C( 339151195), INT32_C( 530998669) }, + { -INT16_C( 4660), -INT16_C( 7367), -INT16_C( 3008), INT16_C( 517), + INT16_C( 2165), INT16_C( 7141), INT16_C( 5136), -INT16_C( 5170) }, + -INT16_C( 8388), + { INT32_C( 231477956), -INT32_C( 481267567), INT32_C( 425312731), INT32_C( 444266749) } }, + { { -INT32_C( 300840527), -INT32_C( 869355563), INT32_C( 235756236), -INT32_C( 556959564) }, + { -INT16_C( 4759), -INT16_C( 2892), -INT16_C( 4998), -INT16_C( 970), + INT16_C( 3072), INT16_C( 8931), -INT16_C( 6658), INT16_C( 3886) }, + -INT16_C( 558), + { -INT32_C( 297412175), -INT32_C( 859388567), INT32_C( 228325908), -INT32_C( 552622788) } }, + { { INT32_C( 889226968), INT32_C( 577849988), INT32_C( 511737493), INT32_C( 820971206) }, + { INT16_C( 2514), -INT16_C( 9068), -INT16_C( 8024), -INT16_C( 6544), + -INT16_C( 9798), -INT16_C( 4286), -INT16_C( 8686), INT16_C( 9288) }, + INT16_C( 8725), + { INT32_C( 1060202068), INT32_C( 652640688), INT32_C( 663308193), INT32_C( 658895606) } }, + { { INT32_C( 999975609), INT32_C( 62995935), INT32_C( 262692963), INT32_C( 517089321) }, + { INT16_C( 6627), INT16_C( 7183), -INT16_C( 4468), INT16_C( 2605), + -INT16_C( 9265), INT16_C( 3099), -INT16_C( 1349), -INT16_C( 6470) }, + -INT16_C( 6407), + { INT32_C( 881253899), INT32_C( 102706521), INT32_C( 245406877), INT32_C( 434182741) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + int16_t c = test_vec[i].c; + simde_int32x4_t r = simde_vqdmlsl_high_n_s16(a, b, c); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmlsl_high_n_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[4]; + int32_t c; + int64_t r[2]; + } test_vec[] = { + { { -INT64_C( 924633394176), -INT64_C( 580110049271) }, + { INT32_C( 794833), INT32_C( 660637), -INT32_C( 707676), INT32_C( 154834) }, + INT32_C( 120564), + { -INT64_C( 753992895648), -INT64_C( 617444862023) } }, + { { INT64_C( 582717418070), -INT64_C( 391739747873) }, + { -INT32_C( 520175), -INT32_C( 988116), INT32_C( 599353), -INT32_C( 906696) }, + -INT32_C( 350146), + { INT64_C( 1002439529146), -INT64_C( 1026691703105) } }, + { { -INT64_C( 123627170620), -INT64_C( 222977168930) }, + { -INT32_C( 844801), INT32_C( 71993), INT32_C( 530543), -INT32_C( 733850) }, + INT32_C( 147888), + { -INT64_C( 280549056988), -INT64_C( 5921951330) } }, + { { INT64_C( 863371662593), INT64_C( 776848419902) }, + { -INT32_C( 813235), -INT32_C( 882166), INT32_C( 482897), -INT32_C( 956511) }, + INT32_C( 977218), + { -INT64_C( 80419618499), INT64_C( 2646287952698) } }, + { { -INT64_C( 255406064309), -INT64_C( 777654343951) }, + { INT32_C( 140558), -INT32_C( 856574), -INT32_C( 425078), -INT32_C( 578417) }, + -INT32_C( 948359), + { -INT64_C( 1061659158313), -INT64_C( 1874748279357) } }, + { { -INT64_C( 458810764855), INT64_C( 711787430179) }, + { INT32_C( 805923), -INT32_C( 468847), INT32_C( 690525), INT32_C( 713403) }, + INT32_C( 640126), + { -INT64_C( 1342856777155), -INT64_C( 201548187377) } }, + { { INT64_C( 758182015630), INT64_C( 506525147029) }, + { INT32_C( 830864), -INT32_C( 705600), -INT32_C( 726060), -INT32_C( 605205) }, + INT32_C( 945162), + { INT64_C( 2130670659070), INT64_C( 1650558683449) } }, + { { INT64_C( 128332029081), -INT64_C( 342906652550) }, + { -INT32_C( 664547), -INT32_C( 618870), -INT32_C( 428918), -INT32_C( 969644) }, + INT32_C( 907914), + { INT64_C( 907173343185), INT64_C( 1417800072682) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + int32_t c = test_vec[i].c; + simde_int64x2_t r = simde_vqdmlsl_high_n_s32(a, b, c); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlsl_high_n_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlsl_high_n_s32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qdmlsl_lane.c b/test/arm/neon/qdmlsl_lane.c new file mode 100644 index 000000000..0f1fb5f5f --- /dev/null +++ b/test/arm/neon/qdmlsl_lane.c @@ -0,0 +1,593 @@ +#define SIMDE_TEST_ARM_NEON_INSN qdmlsl_lane + +#include "test-neon.h" +#include "../../../simde/arm/neon/qdmlsl_lane.h" + +SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ + +static int +test_simde_vqdmlslh_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[1]; + int16_t b[1]; + int16_t v[4]; + int8_t lane; + int32_t r[1]; + } test_vec[] = { + { { -INT32_C( 521768223) }, + { -INT16_C( 1891) }, + { INT16_C( 4436), INT16_C( 3105), -INT16_C( 2887), INT16_C( 1765)}, + INT8_C( 2), + { -INT32_C( 532686857) } }, + { { INT32_C( 958870647) }, + { INT16_C( 3721) }, + { INT16_C( 7881), -INT16_C( 4769), INT16_C( 862), -INT16_C( 5799)}, + INT8_C( 2), + { INT32_C( 952455643) } }, + { { INT32_C( 596132412) }, + { INT16_C( 845) }, + { -INT16_C( 6428), INT16_C( 9822), -INT16_C( 9929), INT16_C( 7367)}, + INT8_C( 2), + { INT32_C( 612912422) } }, + { { -INT32_C( 230553326) }, + { INT16_C( 7874) }, + { -INT16_C( 5546), INT16_C( 7556), -INT16_C( 3520), -INT16_C( 2713)}, + INT8_C( 0), + { -INT32_C( 143214918) } }, + { { INT32_C( 851397549) }, + { INT16_C( 333) }, + { INT16_C( 8762), -INT16_C( 4955), INT16_C( 3059), INT16_C( 7156)}, + INT8_C( 3), + { INT32_C( 846631653) } }, + { { INT32_C( 476045229) }, + { INT16_C( 3731) }, + { -INT16_C( 6899), -INT16_C( 5316), -INT16_C( 5132), INT16_C( 2354)}, + INT8_C( 3), + { INT32_C( 458479681) } }, + { { -INT32_C( 8428601) }, + { -INT16_C( 7758) }, + { -INT16_C( 3068), -INT16_C( 1053), -INT16_C( 5872), INT16_C( 5349)}, + INT8_C( 0), + { -INT32_C( 56031689) } }, + { { INT32_C( 391924831) }, + { -INT16_C( 575) }, + { INT16_C( 7185), -INT16_C( 7316), -INT16_C( 2257), INT16_C( 3710)}, + INT8_C( 3), + { INT32_C( 396191331) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x4_t v = simde_vld1_s16(test_vec[i].v); + int32_t r; + switch(test_vec[i].lane) { + case 0: r = simde_vqdmlslh_lane_s16(test_vec[i].a[0], test_vec[i].b[0], v, 0); break; + case 1: r = simde_vqdmlslh_lane_s16(test_vec[i].a[0], test_vec[i].b[0], v, 1); break; + case 2: r = simde_vqdmlslh_lane_s16(test_vec[i].a[0], test_vec[i].b[0], v, 2); break; + case 3: r = simde_vqdmlslh_lane_s16(test_vec[i].a[0], test_vec[i].b[0], v, 3); break; + default: HEDLEY_UNREACHABLE(); r = 0; break; + } + + simde_assert_equal_i32(r, test_vec[i].r[0]); + } + + return 0; +} + +static int +test_simde_vqdmlsls_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[1]; + int32_t b[1]; + int32_t v[2]; + int8_t lane; + int64_t r[1]; + } test_vec[] = { + { { -INT64_C( 17908307265) }, + { -INT32_C( 901936) }, + { INT32_C( 494783), -INT32_C( 387685)}, + INT8_C( 0), + { INT64_C( 874616892511) } }, + { { -INT64_C( 301361331707) }, + { -INT32_C( 564541) }, + { -INT32_C( 210620), INT32_C( 132803)}, + INT8_C( 1), + { -INT64_C( 151415854861) } }, + { { -INT64_C( 970902083330) }, + { -INT32_C( 541204) }, + { INT32_C( 568552), -INT32_C( 652520)}, + INT8_C( 0), + { -INT64_C( 355496850114) } }, + { { -INT64_C( 909008067967) }, + { -INT32_C( 316442) }, + { INT32_C( 262937), -INT32_C( 816748)}, + INT8_C( 0), + { -INT64_C( 742599447659) } }, + { { INT64_C( 254776768658) }, + { INT32_C( 405732) }, + { INT32_C( 689187), -INT32_C( 933829)}, + INT8_C( 0), + { -INT64_C( 304473671110) } }, + { { -INT64_C( 445641548478) }, + { INT32_C( 520506) }, + { INT32_C( 918585), INT32_C( 262103)}, + INT8_C( 0), + { -INT64_C( 1401899556498) } }, + { { INT64_C( 325969795255) }, + { -INT32_C( 621275) }, + { -INT32_C( 363058), INT32_C( 155048)}, + INT8_C( 1), + { INT64_C( 518624687655) } }, + { { -INT64_C( 684284349157) }, + { INT32_C( 663436) }, + { INT32_C( 158891), -INT32_C( 5388)}, + INT8_C( 1), + { -INT64_C( 677135162821) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x2_t v = simde_vld1_s32(test_vec[i].v); + int64_t r; + switch(test_vec[i].lane) { + case 0: r = simde_vqdmlsls_lane_s32(test_vec[i].a[0], test_vec[i].b[0], v, 0); break; + case 1: r = simde_vqdmlsls_lane_s32(test_vec[i].a[0], test_vec[i].b[0], v, 1); break; + default: HEDLEY_UNREACHABLE(); r = 0; break; + } + + simde_assert_equal_i64(r, test_vec[i].r[0]); + } + + return 0; +} + +static int +test_simde_vqdmlslh_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[1]; + int16_t b[1]; + int16_t v[8]; + int8_t lane; + int32_t r[1]; + } test_vec[] = { + { { -INT32_C( 939501449) }, + { -INT16_C( 4649) }, + { INT16_C( 6426), INT16_C( 6519), -INT16_C( 3709), INT16_C( 4272), + INT16_C( 5523), -INT16_C( 3752), -INT16_C( 8267), -INT16_C( 5596)}, + INT8_C( 5), + { -INT32_C( 974387545) } }, + { { INT32_C( 67701807) }, + { -INT16_C( 95) }, + { INT16_C( 4742), INT16_C( 715), INT16_C( 9336), INT16_C( 1391), + INT16_C( 2321), -INT16_C( 362), -INT16_C( 7284), -INT16_C( 5872)}, + INT8_C( 6), + { INT32_C( 66317847) } }, + { { -INT32_C( 459149237) }, + { -INT16_C( 2172) }, + { INT16_C( 174), -INT16_C( 5623), INT16_C( 484), INT16_C( 1234), + -INT16_C( 2341), -INT16_C( 5297), -INT16_C( 6531), -INT16_C( 6876)}, + INT8_C( 3), + { -INT32_C( 453788741) } }, + { { INT32_C( 152555937) }, + { -INT16_C( 9984) }, + { INT16_C( 501), INT16_C( 3250), -INT16_C( 5883), -INT16_C( 2978), + -INT16_C( 4675), INT16_C( 5875), -INT16_C( 6413), -INT16_C( 6884)}, + INT8_C( 1), + { INT32_C( 217451937) } }, + { { INT32_C( 903005488) }, + { -INT16_C( 8056) }, + { -INT16_C( 2229), INT16_C( 4275), INT16_C( 6721), -INT16_C( 9742), + INT16_C( 8103), -INT16_C( 3583), -INT16_C( 2568), INT16_C( 4540)}, + INT8_C( 4), + { INT32_C( 1033561024) } }, + { { -INT32_C( 788810551) }, + { INT16_C( 7749) }, + { INT16_C( 6311), -INT16_C( 3808), INT16_C( 3622), INT16_C( 344), + INT16_C( 1495), -INT16_C( 126), -INT16_C( 1394), -INT16_C( 5015)}, + INT8_C( 5), + { -INT32_C( 786857803) } }, + { { -INT32_C( 941983580) }, + { INT16_C( 3786) }, + { -INT16_C( 9783), INT16_C( 7609), INT16_C( 7290), -INT16_C( 7255), + INT16_C( 8817), INT16_C( 8016), -INT16_C( 8645), -INT16_C( 7114)}, + INT8_C( 2), + { -INT32_C( 997183460) } }, + { { -INT32_C( 768054624) }, + { INT16_C( 6573) }, + { -INT16_C( 3944), INT16_C( 1034), -INT16_C( 8121), INT16_C( 9965), + INT16_C( 6007), -INT16_C( 748), -INT16_C( 8042), -INT16_C( 6337)}, + INT8_C( 1), + { -INT32_C( 781647588) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t v = simde_vld1q_s16(test_vec[i].v); + int32_t r; + switch(test_vec[i].lane) { + case 0: r = simde_vqdmlslh_laneq_s16(test_vec[i].a[0], test_vec[i].b[0], v, 0); break; + case 1: r = simde_vqdmlslh_laneq_s16(test_vec[i].a[0], test_vec[i].b[0], v, 1); break; + case 2: r = simde_vqdmlslh_laneq_s16(test_vec[i].a[0], test_vec[i].b[0], v, 2); break; + case 3: r = simde_vqdmlslh_laneq_s16(test_vec[i].a[0], test_vec[i].b[0], v, 3); break; + case 4: r = simde_vqdmlslh_laneq_s16(test_vec[i].a[0], test_vec[i].b[0], v, 4); break; + case 5: r = simde_vqdmlslh_laneq_s16(test_vec[i].a[0], test_vec[i].b[0], v, 5); break; + case 6: r = simde_vqdmlslh_laneq_s16(test_vec[i].a[0], test_vec[i].b[0], v, 6); break; + case 7: r = simde_vqdmlslh_laneq_s16(test_vec[i].a[0], test_vec[i].b[0], v, 7); break; + default: HEDLEY_UNREACHABLE(); r = 0; break; + } + + simde_assert_equal_i32(r, test_vec[i].r[0]); + } + + return 0; +} + +static int +test_simde_vqdmlsls_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[1]; + int32_t b[1]; + int32_t v[4]; + int8_t lane; + int64_t r[1]; + } test_vec[] = { + { { -INT64_C( 500450126315) }, + { INT32_C( 524413) }, + { -INT32_C( 742918), INT32_C( 466620), INT32_C( 225935), -INT32_C( 166188)}, + INT8_C( 1), + { -INT64_C( 989853314435) } }, + { { -INT64_C( 132211311684) }, + { INT32_C( 403420) }, + { -INT32_C( 690715), INT32_C( 295053), INT32_C( 615254), INT32_C( 965539)}, + INT8_C( 0), + { INT64_C( 425085178916) } }, + { { INT64_C( 829159715571) }, + { -INT32_C( 818481) }, + { INT32_C( 296998), -INT32_C( 936881), INT32_C( 516497), INT32_C( 655216)}, + INT8_C( 3), + { INT64_C( 1901723409363) } }, + { { -INT64_C( 538012455356) }, + { -INT32_C( 534254) }, + { INT32_C( 237258), INT32_C( 190484), -INT32_C( 87731), -INT32_C( 580229)}, + INT8_C( 2), + { -INT64_C( 631753730704) } }, + { { -INT64_C( 86569196546) }, + { INT32_C( 634638) }, + { INT32_C( 249039), -INT32_C( 868967), -INT32_C( 513773), -INT32_C( 660315)}, + INT8_C( 3), + { INT64_C( 751552785394) } }, + { { -INT64_C( 421340522093) }, + { -INT32_C( 625640) }, + { INT32_C( 950739), INT32_C( 725055), INT32_C( 936872), -INT32_C( 330093)}, + INT8_C( 2), + { INT64_C( 750948674067) } }, + { { -INT64_C( 352349278764) }, + { -INT32_C( 619512) }, + { INT32_C( 446726), INT32_C( 703647), INT32_C( 180352), INT32_C( 457912)}, + INT8_C( 1), + { INT64_C( 519486241764) } }, + { { -INT64_C( 881661908384) }, + { -INT32_C( 171477) }, + { -INT32_C( 467044), -INT32_C( 608804), -INT32_C( 160391), -INT32_C( 839703)}, + INT8_C( 1), + { -INT64_C( 1090453675400) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t v = simde_vld1q_s32(test_vec[i].v); + int64_t r; + switch(test_vec[i].lane) { + case 0: r = simde_vqdmlsls_laneq_s32(test_vec[i].a[0], test_vec[i].b[0], v, 0); break; + case 1: r = simde_vqdmlsls_laneq_s32(test_vec[i].a[0], test_vec[i].b[0], v, 1); break; + case 2: r = simde_vqdmlsls_laneq_s32(test_vec[i].a[0], test_vec[i].b[0], v, 2); break; + case 3: r = simde_vqdmlsls_laneq_s32(test_vec[i].a[0], test_vec[i].b[0], v, 3); break; + default: HEDLEY_UNREACHABLE(); r = 0; break; + } + + simde_assert_equal_i64(r, test_vec[i].r[0]); + } + + return 0; +} + +static int +test_simde_vqdmlsl_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[4]; + int16_t v[4]; + int8_t lane; + int32_t r[4]; + } test_vec[] = { + { { -INT32_C( 621980471), -INT32_C( 848186675), -INT32_C( 509036072), INT32_C( 748012096) }, + { -INT16_C( 9060), -INT16_C( 7835), INT16_C( 5740), INT16_C( 7999) }, + { -INT16_C( 3714), INT16_C( 8214), -INT16_C( 7570), -INT16_C( 6376)}, + INT8_C( 3), + { -INT32_C( 737513591), -INT32_C( 948098595), -INT32_C( 435839592), INT32_C( 850015344) } }, + { { INT32_C( 701138315), INT32_C( 599017440), INT32_C( 406294106), INT32_C( 912157267) }, + { -INT16_C( 8720), -INT16_C( 9909), -INT16_C( 3317), -INT16_C( 9314) }, + { INT16_C( 4644), INT16_C( 6314), INT16_C( 8460), INT16_C( 9616)}, + INT8_C( 2), + { INT32_C( 848680715), INT32_C( 766677720), INT32_C( 462417746), INT32_C( 1069750147) } }, + { { INT32_C( 832658663), -INT32_C( 214972206), INT32_C( 752911172), -INT32_C( 348332696) }, + { INT16_C( 7944), INT16_C( 4659), -INT16_C( 5401), INT16_C( 6379) }, + { -INT16_C( 1071), -INT16_C( 9321), INT16_C( 8232), -INT16_C( 4878)}, + INT8_C( 3), + { INT32_C( 910160327), -INT32_C( 169519002), INT32_C( 700219016), -INT32_C( 286099172) } }, + { { INT32_C( 635065763), INT32_C( 340022238), -INT32_C( 340772832), -INT32_C( 659109761) }, + { INT16_C( 9265), INT16_C( 9751), INT16_C( 9161), -INT16_C( 4242) }, + { INT16_C( 6611), -INT16_C( 4974), -INT16_C( 9605), -INT16_C( 7983)}, + INT8_C( 1), + { INT32_C( 727233983), INT32_C( 437025186), -INT32_C( 249639204), -INT32_C( 701309177) } }, + { { INT32_C( 489256195), -INT32_C( 915698286), INT32_C( 19849451), INT32_C( 925310967) }, + { INT16_C( 2139), INT16_C( 1927), INT16_C( 954), INT16_C( 4965) }, + { INT16_C( 5661), -INT16_C( 1116), INT16_C( 7688), -INT16_C( 3366)}, + INT8_C( 1), + { INT32_C( 494030443), -INT32_C( 911397222), INT32_C( 21978779), INT32_C( 936392847) } }, + { { INT32_C( 375425665), -INT32_C( 850941037), INT32_C( 673625225), INT32_C( 992218818) }, + { INT16_C( 8491), INT16_C( 1184), -INT16_C( 8612), -INT16_C( 5764) }, + { -INT16_C( 1111), INT16_C( 9806), -INT16_C( 5898), INT16_C( 1455)}, + INT8_C( 3), + { INT32_C( 350716855), -INT32_C( 854386477), INT32_C( 698686145), INT32_C( 1008992058) } }, + { { -INT32_C( 244188012), INT32_C( 236430154), INT32_C( 959399976), -INT32_C( 587410301) }, + { INT16_C( 1313), INT16_C( 6408), -INT16_C( 6082), INT16_C( 3305) }, + { INT16_C( 5128), -INT16_C( 3587), -INT16_C( 7384), -INT16_C( 299)}, + INT8_C( 2), + { -INT32_C( 224797628), INT32_C( 331063498), INT32_C( 869581000), -INT32_C( 538602061) } }, + { { -INT32_C( 439264700), INT32_C( 924081299), INT32_C( 127569051), INT32_C( 914997609) }, + { -INT16_C( 430), INT16_C( 3269), INT16_C( 5140), INT16_C( 7867) }, + { -INT16_C( 3308), INT16_C( 7584), INT16_C( 3613), INT16_C( 1332)}, + INT8_C( 2), + { -INT32_C( 436157520), INT32_C( 900459505), INT32_C( 90427411), INT32_C( 858150667) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); + simde_int16x4_t v = simde_vld1_s16(test_vec[i].v); + simde_int32x4_t r; + switch(test_vec[i].lane) { + case 0: r = simde_vqdmlsl_lane_s16(a, b, v, 0); break; + case 1: r = simde_vqdmlsl_lane_s16(a, b, v, 1); break; + case 2: r = simde_vqdmlsl_lane_s16(a, b, v, 2); break; + case 3: r = simde_vqdmlsl_lane_s16(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_s32(0); break; + } + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmlsl_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[2]; + int32_t v[2]; + int8_t lane; + int64_t r[2]; + } test_vec[] = { + { { INT64_C( 358287139107), INT64_C( 431713426998) }, + { INT32_C( 965537), INT32_C( 901602) }, + { INT32_C( 553527), INT32_C( 592565)}, + INT8_C( 1), + { -INT64_C( 785999725703), -INT64_C( 636802151262) } }, + { { INT64_C( 673223528167), INT64_C( 297497561012) }, + { -INT32_C( 449165), INT32_C( 858137) }, + { -INT32_C( 242074), -INT32_C( 654769)}, + INT8_C( 1), + { INT64_C( 85024892397), INT64_C( 1421260571718) } }, + { { -INT64_C( 388620023028), -INT64_C( 263834014828) }, + { -INT32_C( 23441), -INT32_C( 106279) }, + { -INT32_C( 574815), INT32_C( 419135)}, + INT8_C( 0), + { -INT64_C( 415568499858), -INT64_C( 386015541598) } }, + { { INT64_C( 951671555991), INT64_C( 736051930158) }, + { INT32_C( 807824), INT32_C( 478972) }, + { INT32_C( 611136), INT32_C( 507977)}, + INT8_C( 0), + { -INT64_C( 35709100137), INT64_C( 150617865774) } }, + { { -INT64_C( 191997325980), -INT64_C( 306244833898) }, + { -INT32_C( 506328), INT32_C( 665384) }, + { INT32_C( 765803), -INT32_C( 873513)}, + INT8_C( 1), + { -INT64_C( 1076565506508), INT64_C( 856198314086) } }, + { { INT64_C( 750098780084), -INT64_C( 354288298011) }, + { -INT32_C( 755871), -INT32_C( 477052) }, + { INT32_C( 218455), -INT32_C( 2723)}, + INT8_C( 1), + { INT64_C( 745982306618), -INT64_C( 356886323203) } }, + { { -INT64_C( 572018600681), -INT64_C( 948099495092) }, + { -INT32_C( 451512), INT32_C( 380722) }, + { -INT32_C( 372983), -INT32_C( 375407)}, + INT8_C( 0), + { -INT64_C( 908831201273), -INT64_C( 664093827640) } }, + { { INT64_C( 801129221619), -INT64_C( 673441488474) }, + { INT32_C( 868112), INT32_C( 30583) }, + { -INT32_C( 495740), -INT32_C( 746578)}, + INT8_C( 0), + { INT64_C( 1661844907379), -INT64_C( 643119055634) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); + simde_int32x2_t v = simde_vld1_s32(test_vec[i].v); + simde_int64x2_t r; + switch(test_vec[i].lane) { + case 0: r = simde_vqdmlsl_lane_s32(a, b, v, 0); break; + case 1: r = simde_vqdmlsl_lane_s32(a, b, v, 1); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_s64(0); break; + } + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmlsl_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[4]; + int16_t v[8]; + int8_t lane; + int32_t r[4]; + } test_vec[] = { + { { INT32_C( 576260820), INT32_C( 326390657), -INT32_C( 621853039), -INT32_C( 884742225) }, + { INT16_C( 2818), INT16_C( 6712), INT16_C( 5722), INT16_C( 4321) }, + { INT16_C( 6648), INT16_C( 8790), INT16_C( 6169), -INT16_C( 6194), + -INT16_C( 1520), -INT16_C( 9622), -INT16_C( 3104), INT16_C( 8027)}, + INT8_C( 2), + { INT32_C( 541492336), INT32_C( 243578001), -INT32_C( 692451075), -INT32_C( 938054723) } }, + { { -INT32_C( 697467647), INT32_C( 598941739), -INT32_C( 363565604), INT32_C( 526836150) }, + { INT16_C( 6695), -INT16_C( 3263), INT16_C( 2485), INT16_C( 2450) }, + { -INT16_C( 5334), -INT16_C( 2962), INT16_C( 702), INT16_C( 3354), + -INT16_C( 8399), INT16_C( 4470), INT16_C( 8232), INT16_C( 1471)}, + INT8_C( 1), + { -INT32_C( 657806467), INT32_C( 579611727), -INT32_C( 348844464), INT32_C( 541349950) } }, + { { INT32_C( 16745378), -INT32_C( 643200730), -INT32_C( 553632268), INT32_C( 662966256) }, + { -INT16_C( 68), INT16_C( 3722), INT16_C( 2403), INT16_C( 5102) }, + { -INT16_C( 302), -INT16_C( 5801), INT16_C( 591), -INT16_C( 1556), + -INT16_C( 5794), INT16_C( 1883), -INT16_C( 1143), INT16_C( 8514)}, + INT8_C( 3), + { INT32_C( 16533762), -INT32_C( 631617866), -INT32_C( 546154132), INT32_C( 678843680) } }, + { { -INT32_C( 427278683), -INT32_C( 105903585), INT32_C( 41252604), -INT32_C( 318817836) }, + { -INT16_C( 7528), -INT16_C( 2955), -INT16_C( 7668), INT16_C( 3010) }, + { INT16_C( 6156), INT16_C( 9935), INT16_C( 7913), INT16_C( 4232), + -INT16_C( 4512), INT16_C( 6480), -INT16_C( 5793), INT16_C( 7771)}, + INT8_C( 3), + { -INT32_C( 363561691), -INT32_C( 80892465), INT32_C( 106154556), -INT32_C( 344294476) } }, + { { INT32_C( 88945249), -INT32_C( 272013243), INT32_C( 678093194), INT32_C( 755403207) }, + { INT16_C( 6957), -INT16_C( 2125), INT16_C( 3610), -INT16_C( 4205) }, + { -INT16_C( 5280), -INT16_C( 6119), INT16_C( 3261), INT16_C( 4591), + -INT16_C( 9342), -INT16_C( 9870), INT16_C( 939), -INT16_C( 7955)}, + INT8_C( 6), + { INT32_C( 75880003), -INT32_C( 268022493), INT32_C( 671313614), INT32_C( 763300197) } }, + { { INT32_C( 693892333), -INT32_C( 781710600), INT32_C( 285172511), INT32_C( 809047430) }, + { -INT16_C( 214), INT16_C( 7779), INT16_C( 6711), INT16_C( 8023) }, + { -INT16_C( 5595), INT16_C( 9254), INT16_C( 5893), INT16_C( 3207), + INT16_C( 6157), -INT16_C( 1793), -INT16_C( 3084), INT16_C( 6516)}, + INT8_C( 5), + { INT32_C( 693124929), -INT32_C( 753815106), INT32_C( 309238157), INT32_C( 837817908) } }, + { { -INT32_C( 403691731), INT32_C( 112238564), -INT32_C( 325269856), -INT32_C( 967736266) }, + { -INT16_C( 6885), -INT16_C( 7784), -INT16_C( 6629), INT16_C( 7601) }, + { INT16_C( 6518), INT16_C( 1723), -INT16_C( 2663), INT16_C( 4164), + INT16_C( 8103), -INT16_C( 4645), -INT16_C( 7864), INT16_C( 9689)}, + INT8_C( 3), + { -INT32_C( 346353451), INT32_C( 177063716), -INT32_C( 270063544), -INT32_C( 1031037394) } }, + { { -INT32_C( 570543552), INT32_C( 27439576), -INT32_C( 819157093), INT32_C( 278644282) }, + { INT16_C( 6777), -INT16_C( 9799), INT16_C( 5995), -INT16_C( 2829) }, + { INT16_C( 3230), -INT16_C( 5974), -INT16_C( 189), INT16_C( 253), + INT16_C( 7940), -INT16_C( 8193), INT16_C( 2977), -INT16_C( 9125)}, + INT8_C( 6), + { -INT32_C( 610893810), INT32_C( 85782822), -INT32_C( 854851323), INT32_C( 295488148) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); + simde_int16x8_t v = simde_vld1q_s16(test_vec[i].v); + simde_int32x4_t r; + switch(test_vec[i].lane) { + case 0: r = simde_vqdmlsl_laneq_s16(a, b, v, 0); break; + case 1: r = simde_vqdmlsl_laneq_s16(a, b, v, 1); break; + case 2: r = simde_vqdmlsl_laneq_s16(a, b, v, 2); break; + case 3: r = simde_vqdmlsl_laneq_s16(a, b, v, 3); break; + case 4: r = simde_vqdmlsl_laneq_s16(a, b, v, 4); break; + case 5: r = simde_vqdmlsl_laneq_s16(a, b, v, 5); break; + case 6: r = simde_vqdmlsl_laneq_s16(a, b, v, 6); break; + case 7: r = simde_vqdmlsl_laneq_s16(a, b, v, 7); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_s32(0); break; + } + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmlsl_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[2]; + int32_t v[4]; + int8_t lane; + int64_t r[2]; + } test_vec[] = { + { { INT64_C( 692285164033), -INT64_C( 144355368083) }, + { -INT32_C( 452790), INT32_C( 12539) }, + { INT32_C( 796583), -INT32_C( 614228), INT32_C( 678045), -INT32_C( 459061)}, + INT8_C( 1), + { INT64_C( 136052571793), -INT64_C( 128951758299) } }, + { { INT64_C( 347571409953), -INT64_C( 218176683416) }, + { -INT32_C( 887657), INT32_C( 464196) }, + { -INT32_C( 542531), -INT32_C( 303694), -INT32_C( 136429), -INT32_C( 503898)}, + INT8_C( 2), + { INT64_C( 105367096247), -INT64_C( 91517091248) } }, + { { INT64_C( 961419941818), INT64_C( 500579550915) }, + { INT32_C( 538554), -INT32_C( 599232) }, + { INT32_C( 47934), -INT32_C( 850535), -INT32_C( 71916), -INT32_C( 709554)}, + INT8_C( 3), + { INT64_C( 1725686231650), -INT64_C( 349795374141) } }, + { { -INT64_C( 857911950622), INT64_C( 346094269849) }, + { INT32_C( 962763), -INT32_C( 580545) }, + { INT32_C( 985816), INT32_C( 46854), INT32_C( 327618), INT32_C( 618943)}, + INT8_C( 3), + { -INT64_C( 2049702789640), INT64_C( 1064742797719) } }, + { { INT64_C( 287577232554), -INT64_C( 128173901668) }, + { INT32_C( 285174), INT32_C( 14178) }, + { -INT32_C( 309901), -INT32_C( 768787), -INT32_C( 22523), -INT32_C( 178443)}, + INT8_C( 0), + { INT64_C( 464328648102), -INT64_C( 119386348912) } }, + { { -INT64_C( 645061444509), -INT64_C( 246985654113) }, + { INT32_C( 135819), -INT32_C( 966767) }, + { INT32_C( 295862), INT32_C( 973630), -INT32_C( 900353), -INT32_C( 748632)}, + INT8_C( 1), + { -INT64_C( 909536350449), INT64_C( 1635561054307) } }, + { { INT64_C( 335016087460), INT64_C( 178424854943) }, + { INT32_C( 131115), INT32_C( 783497) }, + { INT32_C( 280843), INT32_C( 801145), -INT32_C( 720619), INT32_C( 353365)}, + INT8_C( 1), + { INT64_C( 124931834110), -INT64_C( 1076964553187) } }, + { { -INT64_C( 425709782755), INT64_C( 249067519718) }, + { INT32_C( 458791), -INT32_C( 504729) }, + { INT32_C( 746888), -INT32_C( 42237), -INT32_C( 62401), INT32_C( 286730)}, + INT8_C( 3), + { -INT64_C( 688808069615), INT64_C( 538509412058) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); + simde_int32x4_t v = simde_vld1q_s32(test_vec[i].v); + simde_int64x2_t r; + switch(test_vec[i].lane) { + case 0: r = simde_vqdmlsl_laneq_s32(a, b, v, 0); break; + case 1: r = simde_vqdmlsl_laneq_s32(a, b, v, 1); break; + case 2: r = simde_vqdmlsl_laneq_s32(a, b, v, 2); break; + case 3: r = simde_vqdmlsl_laneq_s32(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_s64(0); break; + } + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; + +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlslh_lane_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlsls_lane_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlslh_laneq_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlsls_laneq_s32) + +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlsl_lane_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlsl_lane_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlsl_laneq_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlsl_laneq_s32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qdmlsl_n.c b/test/arm/neon/qdmlsl_n.c new file mode 100644 index 000000000..b3a49d8a3 --- /dev/null +++ b/test/arm/neon/qdmlsl_n.c @@ -0,0 +1,119 @@ +#define SIMDE_TEST_ARM_NEON_INSN qdmlsl_n + +#include "test-neon.h" +#include "../../../simde/arm/neon/qdmlsl_n.h" + +static int +test_simde_vqdmlsl_n_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t b[4]; + int16_t c; + int32_t r[4]; + } test_vec[] = { + { { -INT32_C( 935711740), -INT32_C( 174833550), INT32_C( 106453999), INT32_C( 245894372) }, + { INT16_C( 1965), INT16_C( 1506), INT16_C( 2690), INT16_C( 4335) }, + -INT16_C( 2827), + { -INT32_C( 924601630), -INT32_C( 166318626), INT32_C( 121663259), INT32_C( 270404462) } }, + { { -INT32_C( 313218121), INT32_C( 514593261), INT32_C( 113708453), -INT32_C( 457187134) }, + { INT16_C( 7234), -INT16_C( 2542), INT16_C( 3459), INT16_C( 1686) }, + INT16_C( 4704), + { -INT32_C( 381275593), INT32_C( 538508397), INT32_C( 81166181), -INT32_C( 473049022) } }, + { { INT32_C( 759935558), -INT32_C( 626391621), INT32_C( 615534132), -INT32_C( 892142678) }, + { INT16_C( 6619), -INT16_C( 4481), -INT16_C( 3925), -INT16_C( 6032) }, + -INT16_C( 9956), + { INT32_C( 891733086), -INT32_C( 715617293), INT32_C( 537379532), -INT32_C( 1012251862) } }, + { { INT32_C( 818873993), -INT32_C( 234231128), INT32_C( 355806293), INT32_C( 207837591) }, + { -INT16_C( 8958), -INT16_C( 2651), -INT16_C( 4422), INT16_C( 715) }, + -INT16_C( 583), + { INT32_C( 808428965), -INT32_C( 237322194), INT32_C( 350650241), INT32_C( 208671281) } }, + { { INT32_C( 189083357), -INT32_C( 239902463), -INT32_C( 710218023), INT32_C( 840478160) }, + { -INT16_C( 7670), -INT16_C( 3502), INT16_C( 7539), INT16_C( 9706) }, + -INT16_C( 5400), + { INT32_C( 106247357), -INT32_C( 277724063), -INT32_C( 628796823), INT32_C( 945302960) } }, + { { -INT32_C( 502897175), -INT32_C( 980374196), -INT32_C( 10005067), -INT32_C( 375058653) }, + { INT16_C( 1640), -INT16_C( 4598), INT16_C( 9885), INT16_C( 8382) }, + -INT16_C( 8565), + { -INT32_C( 474803975), -INT32_C( 1059137936), INT32_C( 159324983), -INT32_C( 231474993) } }, + { { INT32_C( 229618807), -INT32_C( 935038912), -INT32_C( 116926710), -INT32_C( 488341902) }, + { -INT16_C( 121), -INT16_C( 657), INT16_C( 7582), INT16_C( 8753) }, + INT16_C( 7378), + { INT32_C( 231404283), -INT32_C( 925344220), -INT32_C( 228806702), -INT32_C( 617501170) } }, + { { INT32_C( 318577636), INT32_C( 142621378), INT32_C( 303755248), -INT32_C( 344132925) }, + { -INT16_C( 9606), -INT16_C( 5322), -INT16_C( 1423), -INT16_C( 2335) }, + -INT16_C( 7502), + { INT32_C( 174449212), INT32_C( 62770090), INT32_C( 282404556), -INT32_C( 379167265) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); + int16_t c = test_vec[i].c; + simde_int32x4_t r = simde_vqdmlsl_n_s16(a, b, c); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmlsl_n_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t b[2]; + int32_t c; + int64_t r[2]; + } test_vec[] = { + { { -INT64_C( 200123535528), -INT64_C( 4196102740) }, + { INT32_C( 232016), -INT32_C( 55014) }, + -INT32_C( 379867), + { -INT64_C( 23853091784), -INT64_C( 45992109016) } }, + { { -INT64_C( 769211607541), -INT64_C( 516930045720) }, + { INT32_C( 82118), -INT32_C( 235106) }, + INT32_C( 231882), + { -INT64_C( 807294979693), -INT64_C( 407896346736) } }, + { { -INT64_C( 356278515063), -INT64_C( 966670001608) }, + { INT32_C( 495024), -INT32_C( 81655) }, + -INT32_C( 478421), + { INT64_C( 117381239145), -INT64_C( 1044800935118) } }, + { { INT64_C( 323130192131), -INT64_C( 909576347809) }, + { -INT32_C( 876218), -INT32_C( 242467) }, + INT32_C( 984152), + { INT64_C( 2047793586403), -INT64_C( 432327581841) } }, + { { -INT64_C( 354762330196), -INT64_C( 733420261758) }, + { INT32_C( 770483), INT32_C( 57616) }, + INT32_C( 722037), + { -INT64_C( 1467396797938), -INT64_C( 816622029342) } }, + { { INT64_C( 804115288272), INT64_C( 931414127562) }, + { INT32_C( 709174), INT32_C( 19060) }, + -INT32_C( 631167), + { INT64_C( 1699329740388), INT64_C( 955474213602) } }, + { { INT64_C( 583614899765), INT64_C( 20498852161) }, + { INT32_C( 98775), -INT32_C( 624312) }, + -INT32_C( 609430), + { INT64_C( 704007796265), -INT64_C( 740450072159) } }, + { { INT64_C( 509016843325), -INT64_C( 340472500451) }, + { -INT32_C( 331014), -INT32_C( 715967) }, + INT32_C( 784785), + { INT64_C( 1028566487305), INT64_C( 783287823739) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); + int32_t c = test_vec[i].c; + simde_int64x2_t r = simde_vqdmlsl_n_s32(a, b, c); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlsl_n_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmlsl_n_s32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qdmulh.c b/test/arm/neon/qdmulh.c index 28099b33c..09bb7ca32 100644 --- a/test/arm/neon/qdmulh.c +++ b/test/arm/neon/qdmulh.c @@ -3,6 +3,48 @@ #include "test-neon.h" #include "../../../simde/arm/neon/qdmulh.h" +static int +test_simde_vqdmulhh_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a; + int16_t b; + int16_t r; + } test_vec[] = { + { -INT16_C( 7721), + -INT16_C( 4968), + INT16_C( 1170) }, + { INT16_C( 9092), + -INT16_C( 8726), + -INT16_C( 2422) }, + { INT16_C( 3840), + INT16_C( 7805), + INT16_C( 914) }, + { -INT16_C( 4379), + -INT16_C( 9567), + INT16_C( 1278) }, + { INT16_C( 2038), + INT16_C( 2751), + INT16_C( 171) }, + { INT16_C( 7989), + -INT16_C( 7314), + -INT16_C( 1784) }, + { INT16_C( 6979), + -INT16_C( 191), + -INT16_C( 41) }, + { -INT16_C( 9609), + -INT16_C( 68), + INT16_C( 19) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int16_t r = simde_vqdmulhh_s16(test_vec[i].a, test_vec[i].b); + + simde_assert_equal_i16(r, test_vec[i].r); + } + + return 0; +} + static int test_simde_vqdmulh_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -239,6 +281,7 @@ test_simde_vqdmulhq_s32 (SIMDE_MUNIT_TEST_ARGS) { } SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmulhh_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqdmulh_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqdmulh_s32) diff --git a/test/arm/neon/qdmulh_lane.c b/test/arm/neon/qdmulh_lane.c index 9b195d00a..f0fb3c31e 100644 --- a/test/arm/neon/qdmulh_lane.c +++ b/test/arm/neon/qdmulh_lane.c @@ -3,6 +3,83 @@ #include "test-neon.h" #include "../../../simde/arm/neon/qdmulh_lane.h" +static int +test_simde_vqdmulhh_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a; + int16_t v[4]; + int16_t r0[1]; + int16_t r1[1]; + int16_t r2[1]; + int16_t r3[1]; + } test_vec[] = { + { INT16_C( 6416), + { INT16_C( 6214), -INT16_C( 2315), INT16_C( 9709), -INT16_C( 1624)}, + { INT16_C( 1216) }, + { -INT16_C( 454) }, + { INT16_C( 1901) }, + { -INT16_C( 318) } }, + { -INT16_C( 9056), + { INT16_C( 7376), INT16_C( 9070), -INT16_C( 9545), INT16_C( 4821)}, + { -INT16_C( 2039) }, + { -INT16_C( 2507) }, + { INT16_C( 2637) }, + { -INT16_C( 1333) } }, + { INT16_C( 3256), + { -INT16_C( 3529), INT16_C( 7444), -INT16_C( 7737), -INT16_C( 2078)}, + { -INT16_C( 351) }, + { INT16_C( 739) }, + { -INT16_C( 769) }, + { -INT16_C( 207) } }, + { -INT16_C( 4075), + { INT16_C( 6455), -INT16_C( 4295), INT16_C( 2208), INT16_C( 2378)}, + { -INT16_C( 803) }, + { INT16_C( 534) }, + { -INT16_C( 275) }, + { -INT16_C( 296) } }, + { -INT16_C( 233), + { -INT16_C( 420), -INT16_C( 295), INT16_C( 8774), INT16_C( 7993)}, + { INT16_C( 2) }, + { INT16_C( 2) }, + { -INT16_C( 63) }, + { -INT16_C( 57) } }, + { INT16_C( 3967), + { INT16_C( 7619), INT16_C( 8580), -INT16_C( 5182), -INT16_C( 9998)}, + { INT16_C( 922) }, + { INT16_C( 1038) }, + { -INT16_C( 628) }, + { -INT16_C( 1211) } }, + { INT16_C( 9811), + { INT16_C( 2852), INT16_C( 8414), -INT16_C( 7570), -INT16_C( 260)}, + { INT16_C( 853) }, + { INT16_C( 2519) }, + { -INT16_C( 2267) }, + { -INT16_C( 78) } }, + { INT16_C( 5774), + { -INT16_C( 265), INT16_C( 5424), INT16_C( 5625), INT16_C( 8653)}, + { -INT16_C( 47) }, + { INT16_C( 955) }, + { INT16_C( 991) }, + { INT16_C( 1524) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int16_t a = test_vec[i].a; + simde_int16x4_t v = simde_vld1_s16(test_vec[i].v); + int16_t r0 = simde_vqdmulhh_lane_s16(a, v, 0); + int16_t r1 = simde_vqdmulhh_lane_s16(a, v, 1); + int16_t r2 = simde_vqdmulhh_lane_s16(a, v, 2); + int16_t r3 = simde_vqdmulhh_lane_s16(a, v, 3); + + simde_assert_equal_i16(r0, test_vec[i].r0[0]); + simde_assert_equal_i16(r1, test_vec[i].r1[0]); + simde_assert_equal_i16(r2, test_vec[i].r2[0]); + simde_assert_equal_i16(r3, test_vec[i].r3[0]); + } + + return 0; +} + static int test_simde_vqdmulh_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -335,6 +412,135 @@ test_simde_vqdmulhq_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vqdmulhh_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a; + int16_t v[8]; + int16_t r0[1]; + int16_t r1[1]; + int16_t r2[1]; + int16_t r3[1]; + int16_t r4[1]; + int16_t r5[1]; + int16_t r6[1]; + int16_t r7[1]; + } test_vec[] = { + { INT16_C( 5809), + { -INT16_C( 2772), -INT16_C( 797), -INT16_C( 4978), INT16_C( 8613), + -INT16_C( 20), INT16_C( 4225), -INT16_C( 7234), INT16_C( 1124)}, + { -INT16_C( 492) }, + { -INT16_C( 142) }, + { -INT16_C( 883) }, + { INT16_C( 1526) }, + { -INT16_C( 4) }, + { INT16_C( 748) }, + { -INT16_C( 1283) }, + { INT16_C( 199) } }, + { INT16_C( 4731), + { -INT16_C( 9548), -INT16_C( 4228), -INT16_C( 8704), -INT16_C( 7130), + INT16_C( 3047), -INT16_C( 5127), -INT16_C( 6443), -INT16_C( 3552)}, + { -INT16_C( 1379) }, + { -INT16_C( 611) }, + { -INT16_C( 1257) }, + { -INT16_C( 1030) }, + { INT16_C( 439) }, + { -INT16_C( 741) }, + { -INT16_C( 931) }, + { -INT16_C( 513) } }, + { INT16_C( 6067), + { -INT16_C( 5590), INT16_C( 3122), -INT16_C( 6932), INT16_C( 6267), + -INT16_C( 9214), INT16_C( 2040), -INT16_C( 1505), -INT16_C( 6302)}, + { -INT16_C( 1035) }, + { INT16_C( 578) }, + { -INT16_C( 1284) }, + { INT16_C( 1160) }, + { -INT16_C( 1706) }, + { INT16_C( 377) }, + { -INT16_C( 279) }, + { -INT16_C( 1167) } }, + { INT16_C( 5951), + { -INT16_C( 5817), INT16_C( 2584), -INT16_C( 9319), INT16_C( 4946), + INT16_C( 7972), -INT16_C( 1620), INT16_C( 5847), INT16_C( 9934)}, + { -INT16_C( 1057) }, + { INT16_C( 469) }, + { -INT16_C( 1693) }, + { INT16_C( 898) }, + { INT16_C( 1447) }, + { -INT16_C( 295) }, + { INT16_C( 1061) }, + { INT16_C( 1804) } }, + { INT16_C( 1786), + { INT16_C( 7054), -INT16_C( 8079), -INT16_C( 4300), INT16_C( 9724), + INT16_C( 1177), INT16_C( 9725), INT16_C( 5095), -INT16_C( 9211)}, + { INT16_C( 384) }, + { -INT16_C( 441) }, + { -INT16_C( 235) }, + { INT16_C( 530) }, + { INT16_C( 64) }, + { INT16_C( 530) }, + { INT16_C( 277) }, + { -INT16_C( 503) } }, + { INT16_C( 4383), + { INT16_C( 2330), -INT16_C( 843), INT16_C( 6942), -INT16_C( 5843), + -INT16_C( 7143), -INT16_C( 5753), -INT16_C( 4465), INT16_C( 7919)}, + { INT16_C( 311) }, + { -INT16_C( 113) }, + { INT16_C( 928) }, + { -INT16_C( 782) }, + { -INT16_C( 956) }, + { -INT16_C( 770) }, + { -INT16_C( 598) }, + { INT16_C( 1059) } }, + { -INT16_C( 4333), + { INT16_C( 3054), -INT16_C( 192), -INT16_C( 9375), INT16_C( 365), + -INT16_C( 9633), -INT16_C( 8348), -INT16_C( 4002), -INT16_C( 2684)}, + { -INT16_C( 404) }, + { INT16_C( 25) }, + { INT16_C( 1239) }, + { -INT16_C( 49) }, + { INT16_C( 1273) }, + { INT16_C( 1103) }, + { INT16_C( 529) }, + { INT16_C( 354) } }, + { INT16_C( 3910), + { -INT16_C( 7968), INT16_C( 2464), INT16_C( 8236), INT16_C( 2662), + INT16_C( 9529), INT16_C( 1827), -INT16_C( 3400), -INT16_C( 6818)}, + { -INT16_C( 951) }, + { INT16_C( 294) }, + { INT16_C( 982) }, + { INT16_C( 317) }, + { INT16_C( 1137) }, + { INT16_C( 218) }, + { -INT16_C( 406) }, + { -INT16_C( 814) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int16_t a = test_vec[i].a; + simde_int16x8_t v = simde_vld1q_s16(test_vec[i].v); + int16_t r0 = simde_vqdmulhh_laneq_s16(a, v, 0); + int16_t r1 = simde_vqdmulhh_laneq_s16(a, v, 1); + int16_t r2 = simde_vqdmulhh_laneq_s16(a, v, 2); + int16_t r3 = simde_vqdmulhh_laneq_s16(a, v, 3); + int16_t r4 = simde_vqdmulhh_laneq_s16(a, v, 4); + int16_t r5 = simde_vqdmulhh_laneq_s16(a, v, 5); + int16_t r6 = simde_vqdmulhh_laneq_s16(a, v, 6); + int16_t r7 = simde_vqdmulhh_laneq_s16(a, v, 7); + + simde_assert_equal_i16(r0, test_vec[i].r0[0]); + simde_assert_equal_i16(r1, test_vec[i].r1[0]); + simde_assert_equal_i16(r2, test_vec[i].r2[0]); + simde_assert_equal_i16(r3, test_vec[i].r3[0]); + simde_assert_equal_i16(r4, test_vec[i].r4[0]); + simde_assert_equal_i16(r5, test_vec[i].r5[0]); + simde_assert_equal_i16(r6, test_vec[i].r6[0]); + simde_assert_equal_i16(r7, test_vec[i].r7[0]); + } + + return 0; +} + static int test_simde_vqdmulh_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -964,10 +1170,12 @@ test_simde_vqdmulhs_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { } SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmulhh_lane_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqdmulh_lane_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqdmulh_lane_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqdmulhq_lane_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqdmulhq_lane_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmulhh_laneq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqdmulh_laneq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqdmulh_laneq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqdmulhq_laneq_s16) diff --git a/test/arm/neon/qdmull_high.c b/test/arm/neon/qdmull_high.c new file mode 100644 index 000000000..fec13e944 --- /dev/null +++ b/test/arm/neon/qdmull_high.c @@ -0,0 +1,116 @@ +#define SIMDE_TEST_ARM_NEON_INSN qdmull_high + +#include "test-neon.h" +#include "../../../simde/arm/neon/qdmull_high.h" + +static int +test_simde_vqdmull_high_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[8]; + int16_t b[8]; + int32_t r[4]; + } test_vec[] = { + { { INT16_C( 3128), -INT16_C( 4196), -INT16_C( 5822), INT16_C( 1127), + INT16_C( 1638), INT16_C( 9086), INT16_C( 5866), -INT16_C( 1995) }, + { INT16_C( 9725), INT16_C( 4058), INT16_C( 2522), -INT16_C( 7272), + INT16_C( 716), INT16_C( 9606), INT16_C( 3845), INT16_C( 2733) }, + { INT32_C( 2345616), INT32_C( 174560232), INT32_C( 45109540), -INT32_C( 10904670) } }, + { { -INT16_C( 6463), INT16_C( 1731), INT16_C( 3381), -INT16_C( 8558), + -INT16_C( 3430), -INT16_C( 6657), INT16_C( 5967), -INT16_C( 7104) }, + { -INT16_C( 4185), INT16_C( 9571), INT16_C( 2575), INT16_C( 8825), + INT16_C( 7910), -INT16_C( 8648), -INT16_C( 6456), INT16_C( 9583) }, + { -INT32_C( 54262600), INT32_C( 115139472), -INT32_C( 77045904), -INT32_C( 136155264) } }, + { { -INT16_C( 9875), INT16_C( 1134), -INT16_C( 450), -INT16_C( 4418), + -INT16_C( 5943), -INT16_C( 189), INT16_C( 25), INT16_C( 2387) }, + { -INT16_C( 8269), -INT16_C( 2330), INT16_C( 6715), INT16_C( 3434), + -INT16_C( 5281), INT16_C( 1448), -INT16_C( 63), INT16_C( 4354) }, + { INT32_C( 62769966), -INT32_C( 547344), -INT32_C( 3150), INT32_C( 20785996) } }, + { { -INT16_C( 9360), INT16_C( 1022), INT16_C( 7051), -INT16_C( 7920), + INT16_C( 7496), INT16_C( 6263), -INT16_C( 1973), -INT16_C( 7869) }, + { -INT16_C( 4642), INT16_C( 6027), -INT16_C( 208), -INT16_C( 6090), + -INT16_C( 9820), INT16_C( 8085), -INT16_C( 5509), INT16_C( 6497) }, + { -INT32_C( 147221440), INT32_C( 101272710), INT32_C( 21738514), -INT32_C( 102249786) } }, + { { INT16_C( 5511), -INT16_C( 1109), -INT16_C( 1140), INT16_C( 1241), + -INT16_C( 1839), -INT16_C( 5927), INT16_C( 4021), INT16_C( 7688) }, + { -INT16_C( 3740), INT16_C( 8276), -INT16_C( 4572), -INT16_C( 6446), + -INT16_C( 6557), -INT16_C( 315), -INT16_C( 5737), INT16_C( 4260) }, + { INT32_C( 24116646), INT32_C( 3734010), -INT32_C( 46136954), INT32_C( 65501760) } }, + { { -INT16_C( 9524), INT16_C( 7658), INT16_C( 6677), INT16_C( 8903), + -INT16_C( 7844), -INT16_C( 9541), -INT16_C( 8500), -INT16_C( 6550) }, + { INT16_C( 4616), -INT16_C( 2398), -INT16_C( 7077), -INT16_C( 4078), + INT16_C( 6803), INT16_C( 1009), -INT16_C( 3462), -INT16_C( 2642) }, + { -INT32_C( 106725464), -INT32_C( 19253738), INT32_C( 58854000), INT32_C( 34610200) } }, + { { INT16_C( 5127), -INT16_C( 743), -INT16_C( 1622), -INT16_C( 6599), + -INT16_C( 243), INT16_C( 2148), -INT16_C( 6632), -INT16_C( 4335) }, + { -INT16_C( 642), -INT16_C( 425), INT16_C( 645), INT16_C( 8310), + INT16_C( 5285), -INT16_C( 8239), INT16_C( 4776), INT16_C( 2652) }, + { -INT32_C( 2568510), -INT32_C( 35394744), -INT32_C( 63348864), -INT32_C( 22992840) } }, + { { INT16_C( 7091), -INT16_C( 2728), INT16_C( 3504), -INT16_C( 4623), + INT16_C( 4344), INT16_C( 3021), -INT16_C( 4493), INT16_C( 9085) }, + { INT16_C( 1712), -INT16_C( 3276), -INT16_C( 2887), -INT16_C( 9017), + -INT16_C( 7837), -INT16_C( 7316), -INT16_C( 4421), -INT16_C( 1137) }, + { -INT32_C( 68087856), -INT32_C( 44203272), INT32_C( 39727106), -INT32_C( 20659290) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + simde_int32x4_t r = simde_vqdmull_high_s16(a, b); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmull_high_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int32_t b[4]; + int64_t r[2]; + } test_vec[] = { + { { INT32_C( 6170653), -INT32_C( 5146428), -INT32_C( 7147572), INT32_C( 9293542) }, + { INT32_C( 4434909), INT32_C( 9504219), -INT32_C( 4790785), -INT32_C( 9993686) }, + { INT64_C( 68484961448040), -INT64_C( 185753481151624) } }, + { { -INT32_C( 8799211), -INT32_C( 6025890), INT32_C( 5844051), -INT32_C( 6103853) }, + { -INT32_C( 281199), -INT32_C( 5197963), INT32_C( 8779227), INT32_C( 8600897) }, + { INT64_C( 102612500657154), -INT64_C( 104997221912282) } }, + { { -INT32_C( 4311465), INT32_C( 2252047), INT32_C( 9518225), INT32_C( 4004059) }, + { -INT32_C( 9587310), -INT32_C( 8088276), INT32_C( 8731969), INT32_C( 2971007) }, + { INT64_C( 166225691270050), INT64_C( 23792174634826) } }, + { { INT32_C( 3888828), INT32_C( 6869826), -INT32_C( 3770734), -INT32_C( 9612939) }, + { INT32_C( 9919934), -INT32_C( 7244274), INT32_C( 7805998), -INT32_C( 2397312) }, + { -INT64_C( 58868684125064), INT64_C( 46090428039936) } }, + { { -INT32_C( 6348860), INT32_C( 6128394), INT32_C( 2500134), -INT32_C( 9234729) }, + { -INT32_C( 9455599), INT32_C( 3241977), -INT32_C( 4792881), INT32_C( 8209567) }, + { -INT64_C( 23965689492108), -INT64_C( 151626252904686) } }, + { { INT32_C( 1511352), -INT32_C( 7794834), INT32_C( 6466748), INT32_C( 2417772) }, + { INT32_C( 845133), INT32_C( 1594456), INT32_C( 9682949), -INT32_C( 6201105) }, + { INT64_C( 125234382159704), -INT64_C( 29985716076120) } }, + { { -INT32_C( 7276163), INT32_C( 5549728), -INT32_C( 6831820), INT32_C( 992115) }, + { INT32_C( 9199122), -INT32_C( 4143289), -INT32_C( 2419222), -INT32_C( 7236938) }, + { INT64_C( 33055378488080), -INT64_C( 14359749487740) } }, + { { -INT32_C( 5422339), INT32_C( 6658363), -INT32_C( 9978291), INT32_C( 4140966) }, + { INT32_C( 7413089), -INT32_C( 7374139), INT32_C( 2222880), -INT32_C( 7849937) }, + { -INT64_C( 44361086996160), -INT64_C( 65012644438284) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + simde_int64x2_t r = simde_vqdmull_high_s32(a, b); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmull_high_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmull_high_s32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qdmull_high_lane.c b/test/arm/neon/qdmull_high_lane.c new file mode 100644 index 000000000..fc0b7372a --- /dev/null +++ b/test/arm/neon/qdmull_high_lane.c @@ -0,0 +1,254 @@ +#define SIMDE_TEST_ARM_NEON_INSN qdmull_high_lane + +#include "test-neon.h" +#include "../../../simde/arm/neon/qdmull_high_lane.h" + +static int +test_simde_vqdmull_high_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[8]; + int16_t v[4]; + int8_t lane; + int32_t r[4]; + } test_vec[] = { + { { -INT16_C( 6760), -INT16_C( 95), INT16_C( 9414), INT16_C( 1581), + INT16_C( 9531), -INT16_C( 5668), -INT16_C( 1231), -INT16_C( 8041) }, + { -INT16_C( 2089), -INT16_C( 9207), -INT16_C( 8278), -INT16_C( 2672) }, + INT8_C( 3), + { -INT32_C( 50933664), INT32_C( 30289792), INT32_C( 6578464), INT32_C( 42971104) } }, + { { INT16_C( 3445), INT16_C( 9881), -INT16_C( 2591), -INT16_C( 8857), + INT16_C( 2650), INT16_C( 5967), -INT16_C( 2644), -INT16_C( 4403) }, + { -INT16_C( 8527), -INT16_C( 1857), -INT16_C( 788), INT16_C( 8560) }, + INT8_C( 2), + { -INT32_C( 4176400), -INT32_C( 9403992), INT32_C( 4166944), INT32_C( 6939128) } }, + { { -INT16_C( 5482), INT16_C( 7922), -INT16_C( 1220), -INT16_C( 1648), + -INT16_C( 5991), INT16_C( 6334), -INT16_C( 3563), INT16_C( 8545) }, + { -INT16_C( 5750), INT16_C( 6074), INT16_C( 7152), -INT16_C( 6897) }, + INT8_C( 1), + { -INT32_C( 72778668), INT32_C( 76945432), -INT32_C( 43283324), INT32_C( 103804660) } }, + { { -INT16_C( 838), INT16_C( 5238), -INT16_C( 8736), INT16_C( 296), + INT16_C( 8407), INT16_C( 8762), -INT16_C( 6003), -INT16_C( 4085) }, + { INT16_C( 9212), INT16_C( 8531), INT16_C( 6599), INT16_C( 888) }, + INT8_C( 2), + { INT32_C( 110955586), INT32_C( 115640876), -INT32_C( 79227594), -INT32_C( 53913830) } }, + { { INT16_C( 3680), INT16_C( 8622), INT16_C( 8753), INT16_C( 5696), + INT16_C( 5471), -INT16_C( 2264), -INT16_C( 5501), -INT16_C( 6087) }, + { INT16_C( 290), -INT16_C( 5208), -INT16_C( 7387), INT16_C( 6990) }, + INT8_C( 3), + { INT32_C( 76484580), -INT32_C( 31650720), -INT32_C( 76903980), -INT32_C( 85096260) } }, + { { -INT16_C( 3611), INT16_C( 1897), -INT16_C( 2126), INT16_C( 6986), + INT16_C( 6474), INT16_C( 2698), INT16_C( 7044), -INT16_C( 2562) }, + { -INT16_C( 1326), INT16_C( 2049), INT16_C( 8320), -INT16_C( 3248) }, + INT8_C( 0), + { -INT32_C( 17169048), -INT32_C( 7155096), -INT32_C( 18680688), INT32_C( 6794424) } }, + { { -INT16_C( 6094), -INT16_C( 9976), INT16_C( 8334), -INT16_C( 2735), + -INT16_C( 7194), INT16_C( 7077), -INT16_C( 9240), -INT16_C( 9787) }, + { INT16_C( 9801), INT16_C( 4409), INT16_C( 4484), INT16_C( 6660) }, + INT8_C( 2), + { -INT32_C( 64515792), INT32_C( 63466536), -INT32_C( 82864320), -INT32_C( 87769816) } }, + { { -INT16_C( 5859), -INT16_C( 3555), -INT16_C( 8501), INT16_C( 5189), + -INT16_C( 7467), -INT16_C( 4387), -INT16_C( 8681), -INT16_C( 6318) }, + { INT16_C( 8507), -INT16_C( 3782), -INT16_C( 5846), INT16_C( 8906) }, + INT8_C( 2), + { INT32_C( 87304164), INT32_C( 51292804), INT32_C( 101498252), INT32_C( 73870056) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + simde_int16x4_t v = simde_vld1_s16(test_vec[i].v); + simde_int32x4_t r; + + SIMDE_CONSTIFY_4_(simde_vqdmull_high_lane_s16, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, v); + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmull_high_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int32_t v[2]; + int8_t lane; + int64_t r[2]; + } test_vec[] = { + { { -INT32_C( 990609), INT32_C( 371504), INT32_C( 357144), INT32_C( 420092) }, + { -INT32_C( 173232), INT32_C( 32216) }, + INT8_C( 1), + { INT64_C( 23011502208), INT64_C( 27067367744) } }, + { { INT32_C( 996227), -INT32_C( 523222), -INT32_C( 796632), -INT32_C( 264381) }, + { INT32_C( 249707), -INT32_C( 944623) }, + INT8_C( 0), + { -INT64_C( 397849173648), -INT64_C( 132035572734) } }, + { { -INT32_C( 45430), INT32_C( 963547), INT32_C( 504181), INT32_C( 576837) }, + { -INT32_C( 96621), INT32_C( 13130) }, + INT8_C( 0), + { -INT64_C( 97428944802), -INT64_C( 111469135554) } }, + { { -INT32_C( 708415), INT32_C( 807399), -INT32_C( 664026), -INT32_C( 253901) }, + { INT32_C( 649506), -INT32_C( 839324) }, + INT8_C( 0), + { -INT64_C( 862577742312), -INT64_C( 329820445812) } }, + { { -INT32_C( 746644), -INT32_C( 231482), -INT32_C( 966072), INT32_C( 467746) }, + { INT32_C( 448186), INT32_C( 992765) }, + INT8_C( 1), + { -INT64_C(1918164938160), INT64_C( 928723715380) } }, + { { -INT32_C( 962442), INT32_C( 79204), -INT32_C( 791215), -INT32_C( 329825) }, + { -INT32_C( 321988), -INT32_C( 285699) }, + INT8_C( 1), + { INT64_C( 452098668570), INT64_C( 188461345350) } }, + { { INT32_C( 223619), INT32_C( 857944), INT32_C( 770991), -INT32_C( 773107) }, + { INT32_C( 301470), INT32_C( 325192) }, + INT8_C( 0), + { INT64_C( 464861313540), -INT64_C( 466137134580) } }, + { { INT32_C( 320762), INT32_C( 304253), INT32_C( 101896), INT32_C( 354218) }, + { INT32_C( 22025), -INT32_C( 31237) }, + INT8_C( 0), + { INT64_C( 4488518800), INT64_C( 15603302900) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int32x2_t v = simde_vld1_s32(test_vec[i].v); + simde_int64x2_t r; + + SIMDE_CONSTIFY_2_(simde_vqdmull_high_lane_s32, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, v); + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmull_high_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[8]; + int16_t v[8]; + int8_t lane; + int32_t r[4]; + } test_vec[] = { + { { -INT16_C( 6151), INT16_C( 6904), -INT16_C( 9865), INT16_C( 7767), + INT16_C( 7866), -INT16_C( 1675), -INT16_C( 6795), INT16_C( 1504) }, + { -INT16_C( 9543), INT16_C( 3083), -INT16_C( 5514), INT16_C( 5208), + INT16_C( 4852), INT16_C( 3875), INT16_C( 4128), INT16_C( 4648) }, + INT8_C( 7), + { INT32_C( 73122336), -INT32_C( 15570800), -INT32_C( 63166320), INT32_C( 13981184) } }, + { { -INT16_C( 9730), -INT16_C( 8790), INT16_C( 8429), INT16_C( 4708), + -INT16_C( 2554), -INT16_C( 4220), INT16_C( 9275), -INT16_C( 9174) }, + { INT16_C( 5355), -INT16_C( 8996), -INT16_C( 3289), -INT16_C( 5823), + INT16_C( 4975), -INT16_C( 9273), -INT16_C( 5946), INT16_C( 9530) }, + INT8_C( 0), + { -INT32_C( 27353340), -INT32_C( 45196200), INT32_C( 99335250), -INT32_C( 98253540) } }, + { { INT16_C( 9947), INT16_C( 2471), -INT16_C( 6285), INT16_C( 7562), + -INT16_C( 8090), INT16_C( 5268), INT16_C( 4311), -INT16_C( 5662) }, + { -INT16_C( 38), INT16_C( 252), -INT16_C( 9990), -INT16_C( 7375), + INT16_C( 82), INT16_C( 2766), -INT16_C( 7954), -INT16_C( 600) }, + INT8_C( 7), + { INT32_C( 9708000), -INT32_C( 6321600), -INT32_C( 5173200), INT32_C( 6794400) } }, + { { INT16_C( 3237), INT16_C( 5149), INT16_C( 1031), -INT16_C( 1352), + -INT16_C( 2243), -INT16_C( 959), -INT16_C( 5508), -INT16_C( 2221) }, + { INT16_C( 6518), INT16_C( 3738), -INT16_C( 9586), INT16_C( 1434), + -INT16_C( 9713), INT16_C( 4094), -INT16_C( 4925), INT16_C( 9658) }, + INT8_C( 4), + { INT32_C( 43572518), INT32_C( 18629534), INT32_C( 106998408), INT32_C( 43145146) } }, + { { INT16_C( 1366), -INT16_C( 2722), -INT16_C( 1519), -INT16_C( 4492), + -INT16_C( 2385), INT16_C( 3823), -INT16_C( 4387), -INT16_C( 6834) }, + { -INT16_C( 3277), INT16_C( 8569), -INT16_C( 1556), INT16_C( 2857), + INT16_C( 1937), INT16_C( 165), -INT16_C( 6546), INT16_C( 6797) }, + INT8_C( 4), + { -INT32_C( 9239490), INT32_C( 14810302), -INT32_C( 16995238), -INT32_C( 26474916) } }, + { { -INT16_C( 4626), -INT16_C( 3341), INT16_C( 487), -INT16_C( 3740), + INT16_C( 6564), INT16_C( 2633), INT16_C( 8508), -INT16_C( 9233) }, + { -INT16_C( 3352), INT16_C( 9743), -INT16_C( 358), INT16_C( 2662), + -INT16_C( 8355), -INT16_C( 4002), -INT16_C( 632), -INT16_C( 8932) }, + INT8_C( 0), + { -INT32_C( 44005056), -INT32_C( 17651632), -INT32_C( 57037632), INT32_C( 61898032) } }, + { { INT16_C( 5502), INT16_C( 8721), -INT16_C( 1735), INT16_C( 4916), + INT16_C( 7794), INT16_C( 4166), INT16_C( 5183), -INT16_C( 3690) }, + { -INT16_C( 6794), INT16_C( 882), INT16_C( 4339), INT16_C( 5349), + -INT16_C( 2173), INT16_C( 4714), -INT16_C( 1175), -INT16_C( 2584) }, + INT8_C( 3), + { INT32_C( 83380212), INT32_C( 44567868), INT32_C( 55447734), -INT32_C( 39475620) } }, + { { INT16_C( 4845), -INT16_C( 7931), INT16_C( 6178), INT16_C( 701), + INT16_C( 190), INT16_C( 9654), INT16_C( 6726), -INT16_C( 2015) }, + { INT16_C( 6141), -INT16_C( 4604), INT16_C( 840), INT16_C( 9044), + INT16_C( 416), INT16_C( 8601), -INT16_C( 4851), INT16_C( 9143) }, + INT8_C( 7), + { INT32_C( 3474340), INT32_C( 176533044), INT32_C( 122991636), -INT32_C( 36846290) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + simde_int16x8_t v = simde_vld1q_s16(test_vec[i].v); + simde_int32x4_t r; + + SIMDE_CONSTIFY_8_(simde_vqdmull_high_laneq_s16, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, v); + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmull_high_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int32_t v[4]; + int8_t lane; + int64_t r[2]; + } test_vec[] = { + { { INT32_C( 170394437), INT32_C( 838440752), INT32_C( 2000223965), INT32_C( 224778862) }, + { INT32_C( 1294207306), INT32_C( 797463044), INT32_C( 1281649861), INT32_C( 125061820) }, + INT8_C( 2), + { INT64_C(5127173533422237730), INT64_C(576175594476076364) } }, + { { -INT32_C( 1594036428), INT32_C( 1222292849), INT32_C( 2015861929), -INT32_C( 82090536) }, + { INT32_C( 2123083315), -INT32_C( 204875652), -INT32_C( 937168206), -INT32_C( 508936045) }, + INT8_C( 0), + { INT64_C(8559685653607229270), -INT64_C(348570094602013680) } }, + { { INT32_C( 692200562), -INT32_C( 254752304), INT32_C( 676464785), INT32_C( 1809723204) }, + { INT32_C( 1804668600), INT32_C( 1691972474), INT32_C( 1076408359), INT32_C( 986035332) }, + INT8_C( 1), + { INT64_C(2289119591700656180), INT64_C(6124003693454173392) } }, + { { -INT32_C( 1310053903), INT32_C( 1964973717), -INT32_C( 230274986), -INT32_C( 655165622) }, + { INT32_C( 77849012), INT32_C( 523183193), INT32_C( 306349840), -INT32_C( 1459982813) }, + INT8_C( 2), + { -INT64_C(141089410234204480), -INT64_C(401419766946400960) } }, + { { -INT32_C( 167463656), -INT32_C( 1320629443), -INT32_C( 1195096316), INT32_C( 285557548) }, + { -INT32_C( 905947543), -INT32_C( 461591499), -INT32_C( 1003092768), -INT32_C( 1992120667) }, + INT8_C( 1), + { INT64_C(1103292599903635368), -INT64_C(263621873264168904) } }, + { { INT32_C( 1117388545), INT32_C( 1452962560), INT32_C( 1037242287), -INT32_C( 1218607240) }, + { INT32_C( 1611559047), -INT32_C( 218371100), -INT32_C( 469758461), -INT32_C( 2134578451) }, + INT8_C( 2), + { -INT64_C(974506680850480614), INT64_C(1144902123251715280) } }, + { { -INT32_C( 1462364218), -INT32_C( 1974598648), -INT32_C( 1177302303), INT32_C( 690147037) }, + { INT32_C( 349207586), -INT32_C( 978044017), INT32_C( 1007534029), INT32_C( 1033057756) }, + INT8_C( 1), + { INT64_C(2302906947298942302), -INT64_C(1349988360776255258) } }, + { { -INT32_C( 957553051), INT32_C( 1058397298), INT32_C( 297981031), -INT32_C( 1782179923) }, + { -INT32_C( 2100356702), INT32_C( 1207072133), INT32_C( 443917590), -INT32_C( 985658139) }, + INT8_C( 3), + { -INT64_C(587414856945522618), INT64_C(3513240292534686594) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int32x4_t v = simde_vld1q_s32(test_vec[i].v); + simde_int64x2_t r; + + SIMDE_CONSTIFY_4_(simde_vqdmull_high_laneq_s32, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, v); + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmull_high_lane_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmull_high_lane_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmull_high_laneq_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmull_high_laneq_s32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qdmull_high_n.c b/test/arm/neon/qdmull_high_n.c new file mode 100644 index 000000000..aa4bef0a4 --- /dev/null +++ b/test/arm/neon/qdmull_high_n.c @@ -0,0 +1,108 @@ +#define SIMDE_TEST_ARM_NEON_INSN qdmull_high_n + +#include "test-neon.h" +#include "../../../simde/arm/neon/qdmull_high_n.h" + +static int +test_simde_vqdmull_high_n_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[8]; + int16_t b; + int32_t r[4]; + } test_vec[] = { + { { -INT16_C( 6855), INT16_C( 7703), -INT16_C( 7197), -INT16_C( 5339), + -INT16_C( 1011), -INT16_C( 1303), -INT16_C( 6813), -INT16_C( 7254) }, + -INT16_C( 678), + { INT32_C( 1370916), INT32_C( 1766868), INT32_C( 9238428), INT32_C( 9836424) } }, + { { -INT16_C( 1960), INT16_C( 1926), INT16_C( 1231), INT16_C( 5811), + -INT16_C( 6109), INT16_C( 2635), INT16_C( 4629), INT16_C( 8723) }, + INT16_C( 4639), + { -INT32_C( 56679302), INT32_C( 24447530), INT32_C( 42947862), INT32_C( 80931994) } }, + { { -INT16_C( 9194), INT16_C( 393), INT16_C( 4109), -INT16_C( 9430), + INT16_C( 9440), INT16_C( 9260), -INT16_C( 5474), INT16_C( 7193) }, + INT16_C( 8124), + { INT32_C( 153381120), INT32_C( 150456480), -INT32_C( 88941552), INT32_C( 116871864) } }, + { { -INT16_C( 2722), -INT16_C( 4769), INT16_C( 3664), INT16_C( 4842), + INT16_C( 9709), -INT16_C( 952), INT16_C( 2081), INT16_C( 2443) }, + INT16_C( 4321), + { INT32_C( 83905178), -INT32_C( 8227184), INT32_C( 17984002), INT32_C( 21112406) } }, + { { -INT16_C( 549), INT16_C( 2071), -INT16_C( 2943), -INT16_C( 6513), + INT16_C( 5234), INT16_C( 8269), INT16_C( 8331), -INT16_C( 3593) }, + INT16_C( 3622), + { INT32_C( 37915096), INT32_C( 59900636), INT32_C( 60349764), -INT32_C( 26027692) } }, + { { INT16_C( 1637), INT16_C( 6005), INT16_C( 7757), -INT16_C( 5037), + -INT16_C( 7412), INT16_C( 9954), INT16_C( 9621), INT16_C( 539) }, + INT16_C( 1643), + { -INT32_C( 24355832), INT32_C( 32708844), INT32_C( 31614606), INT32_C( 1771154) } }, + { { -INT16_C( 1042), -INT16_C( 8045), -INT16_C( 7997), INT16_C( 7058), + INT16_C( 3904), -INT16_C( 6847), INT16_C( 7139), INT16_C( 168) }, + INT16_C( 4556), + { INT32_C( 35573248), -INT32_C( 62389864), INT32_C( 65050568), INT32_C( 1530816) } }, + { { -INT16_C( 3460), -INT16_C( 7482), -INT16_C( 2179), -INT16_C( 7154), + -INT16_C( 6641), INT16_C( 7931), -INT16_C( 9615), INT16_C( 6387) }, + -INT16_C( 8323), + { INT32_C( 110546086), -INT32_C( 132019426), INT32_C( 160051290), -INT32_C( 106318002) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + int16_t b = test_vec[i].b; + simde_int32x4_t r = simde_vqdmull_high_n_s16(a, b); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmull_high_n_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int32_t b; + int64_t r[2]; + } test_vec[] = { + { { -INT32_C( 362613), -INT32_C( 267565), -INT32_C( 843693), INT32_C( 671042) }, + -INT32_C( 81635), + { INT64_C( 137749756110), -INT64_C( 109561027340) } }, + { { INT32_C( 908361), -INT32_C( 481342), -INT32_C( 749809), -INT32_C( 656516) }, + INT32_C( 208325), + { -INT64_C( 312407919850), -INT64_C( 273537391400) } }, + { { -INT32_C( 776382), INT32_C( 217778), -INT32_C( 876767), INT32_C( 470808) }, + INT32_C( 537187), + { -INT64_C( 941975668858), INT64_C( 505823874192) } }, + { { INT32_C( 339294), -INT32_C( 67580), INT32_C( 598076), -INT32_C( 845644) }, + -INT32_C( 215512), + { -INT64_C( 257785109824), INT64_C( 364492859456) } }, + { { -INT32_C( 703782), INT32_C( 666922), INT32_C( 481702), INT32_C( 138725) }, + INT32_C( 750892), + { INT64_C( 723412356368), INT64_C( 208334985400) } }, + { { INT32_C( 793067), -INT32_C( 102694), -INT32_C( 479920), INT32_C( 869866) }, + INT32_C( 963867), + { -INT64_C( 925158101280), INT64_C( 1676870263644) } }, + { { INT32_C( 203238), INT32_C( 788885), INT32_C( 260866), INT32_C( 677937) }, + -INT32_C( 242772), + { -INT64_C( 126661921104), -INT64_C( 329168242728) } }, + { { INT32_C( 559982), INT32_C( 968419), -INT32_C( 38196), INT32_C( 286789) }, + -INT32_C( 650315), + { INT64_C( 49678863480), -INT64_C( 373006377070) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + int32_t b = test_vec[i].b; + simde_int64x2_t r = simde_vqdmull_high_n_s32(a, b); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmull_high_n_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmull_high_n_s32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qdmull_lane.c b/test/arm/neon/qdmull_lane.c new file mode 100644 index 000000000..a992fffa4 --- /dev/null +++ b/test/arm/neon/qdmull_lane.c @@ -0,0 +1,696 @@ +#define SIMDE_TEST_ARM_NEON_INSN qdmull_lane + +#include "test-neon.h" +#include "../../../simde/arm/neon/qdmull_lane.h" + +static int +test_simde_vqdmullh_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[1]; + int16_t v[4]; + int32_t r0[1]; + int32_t r1[1]; + int32_t r2[1]; + int32_t r3[1]; + } test_vec[] = { + { { INT16_C( 5511) }, + { -INT16_C( 1872), -INT16_C( 3187), INT16_C( 9351), INT16_C( 5112) }, + { -INT32_C( 20633184), }, + { -INT32_C( 35127114), }, + { INT32_C( 103066722), }, + { INT32_C( 56344464), } }, + { { INT16_C( 3072) }, + { INT16_C( 3954), -INT16_C( 1030), -INT16_C( 9771), INT16_C( 3670) }, + { INT32_C( 24293376), }, + { -INT32_C( 6328320), }, + { -INT32_C( 60033024), }, + { INT32_C( 22548480), } }, + { { -INT16_C( 4970) }, + { INT16_C( 4371), INT16_C( 532), INT16_C( 4769), -INT16_C( 6774) }, + { -INT32_C( 43447740), }, + { -INT32_C( 5288080), }, + { -INT32_C( 47403860), }, + { INT32_C( 67333560), } }, + { { -INT16_C( 7124) }, + { INT16_C( 6609), -INT16_C( 8752), INT16_C( 44), INT16_C( 1460) }, + { -INT32_C( 94165032), }, + { INT32_C( 124698496), }, + { -INT32_C( 626912), }, + { -INT32_C( 20802080), } }, + { { INT16_C( 6847) }, + { INT16_C( 1195), INT16_C( 6886), INT16_C( 1817), INT16_C( 2853) }, + { INT32_C( 16364330), }, + { INT32_C( 94296884), }, + { INT32_C( 24881998), }, + { INT32_C( 39068982), } }, + { { INT16_C( 3195) }, + { INT16_C( 9325), INT16_C( 6282), INT16_C( 466), INT16_C( 7828) }, + { INT32_C( 59586750), }, + { INT32_C( 40141980), }, + { INT32_C( 2977740), }, + { INT32_C( 50020920), } }, + { { -INT16_C( 2439) }, + { -INT16_C( 9637), -INT16_C( 1244), INT16_C( 4196), INT16_C( 1735) }, + { INT32_C( 47009286), }, + { INT32_C( 6068232), }, + { -INT32_C( 20468088), }, + { -INT32_C( 8463330), } }, + { { INT16_C( 8832) }, + { INT16_C( 8903), INT16_C( 3035), -INT16_C( 5601), -INT16_C( 3161) }, + { INT32_C( 157262592), }, + { INT32_C( 53610240), }, + { -INT32_C( 98936064), }, + { -INT32_C( 55835904), } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int16_t a = test_vec[i].a[0]; + simde_int16x4_t v = simde_vld1_s16(test_vec[i].v); + int32_t r0 = simde_vqdmullh_lane_s16(a, v, 0); + int32_t r1 = simde_vqdmullh_lane_s16(a, v, 1); + int32_t r2 = simde_vqdmullh_lane_s16(a, v, 2); + int32_t r3 = simde_vqdmullh_lane_s16(a, v, 3); + + simde_assert_equal_i32(r0, test_vec[i].r0[0]); + simde_assert_equal_i32(r1, test_vec[i].r1[0]); + simde_assert_equal_i32(r2, test_vec[i].r2[0]); + simde_assert_equal_i32(r3, test_vec[i].r3[0]); + } + + return 0; +} + +static int +test_simde_vqdmullh_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[1]; + int16_t v[8]; + int32_t r0[1]; + int32_t r1[1]; + int32_t r2[1]; + int32_t r3[1]; + int32_t r4[1]; + int32_t r5[1]; + int32_t r6[1]; + int32_t r7[1]; + } test_vec[] = { + { { INT16_C( 376) }, + { -INT16_C( 9742), INT16_C( 8640), -INT16_C( 6706), INT16_C( 6606), + INT16_C( 2187), -INT16_C( 2059), -INT16_C( 8275), -INT16_C( 4697) }, + { -INT32_C( 7325984), }, + { INT32_C( 6497280), }, + { -INT32_C( 5042912), }, + { INT32_C( 4967712), }, + { INT32_C( 1644624), }, + { -INT32_C( 1548368), }, + { -INT32_C( 6222800), }, + { -INT32_C( 3532144), } }, + { { INT16_C( 4604) }, + { INT16_C( 3066), -INT16_C( 4532), INT16_C( 6258), INT16_C( 8484), + INT16_C( 5421), INT16_C( 5549), INT16_C( 698), INT16_C( 3341) }, + { INT32_C( 28231728), }, + { -INT32_C( 41730656), }, + { INT32_C( 57623664), }, + { INT32_C( 78120672), }, + { INT32_C( 49916568), }, + { INT32_C( 51095192), }, + { INT32_C( 6427184), }, + { INT32_C( 30763928), } }, + { { -INT16_C( 3221) }, + { -INT16_C( 849), -INT16_C( 3336), INT16_C( 8568), INT16_C( 9920), + -INT16_C( 6161), INT16_C( 3431), -INT16_C( 5355), INT16_C( 155) }, + { INT32_C( 5469258), }, + { INT32_C( 21490512), }, + { -INT32_C( 55195056), }, + { -INT32_C( 63904640), }, + { INT32_C( 39689162), }, + { -INT32_C( 22102502), }, + { INT32_C( 34496910), }, + { -INT32_C( 998510), } }, + { { -INT16_C( 8783) }, + { -INT16_C( 8915), INT16_C( 1873), -INT16_C( 2862), -INT16_C( 6808), + INT16_C( 6765), -INT16_C( 5486), INT16_C( 4240), INT16_C( 6859) }, + { INT32_C( 156600890), }, + { -INT32_C( 32901118), }, + { INT32_C( 50273892), }, + { INT32_C( 119589328), }, + { -INT32_C( 118833990), }, + { INT32_C( 96367076), }, + { -INT32_C( 74479840), }, + { -INT32_C( 120485194), } }, + { { INT16_C( 891) }, + { INT16_C( 8790), INT16_C( 725), INT16_C( 1233), INT16_C( 9529), + -INT16_C( 4616), -INT16_C( 4963), INT16_C( 449), -INT16_C( 3328) }, + { INT32_C( 15663780), }, + { INT32_C( 1291950), }, + { INT32_C( 2197206), }, + { INT32_C( 16980678), }, + { -INT32_C( 8225712), }, + { -INT32_C( 8844066), }, + { INT32_C( 800118), }, + { -INT32_C( 5930496), } }, + { { -INT16_C( 5783) }, + { -INT16_C( 5468), INT16_C( 6792), INT16_C( 7131), -INT16_C( 3247), + INT16_C( 7838), -INT16_C( 4352), INT16_C( 7266), -INT16_C( 8449) }, + { INT32_C( 63242888), }, + { -INT32_C( 78556272), }, + { -INT32_C( 82477146), }, + { INT32_C( 37554802), }, + { -INT32_C( 90654308), }, + { INT32_C( 50335232), }, + { -INT32_C( 84038556), }, + { INT32_C( 97721134), } }, + { { INT16_C( 5603) }, + { -INT16_C( 5528), -INT16_C( 1564), INT16_C( 4479), -INT16_C( 3106), + -INT16_C( 7568), INT16_C( 1209), -INT16_C( 4439), INT16_C( 7278) }, + { -INT32_C( 61946768), }, + { -INT32_C( 17526184), }, + { INT32_C( 50191674), }, + { -INT32_C( 34805836), }, + { -INT32_C( 84807008), }, + { INT32_C( 13548054), }, + { -INT32_C( 49743434), }, + { INT32_C( 81557268), } }, + { { -INT16_C( 2020) }, + { -INT16_C( 1475), INT16_C( 7978), INT16_C( 9778), -INT16_C( 9731), + INT16_C( 5122), INT16_C( 669), -INT16_C( 1949), INT16_C( 7057) }, + { INT32_C( 5959000), }, + { -INT32_C( 32231120), }, + { -INT32_C( 39503120), }, + { INT32_C( 39313240), }, + { -INT32_C( 20692880), }, + { -INT32_C( 2702760), }, + { INT32_C( 7873960), }, + { -INT32_C( 28510280), } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int16_t a = test_vec[i].a[0]; + simde_int16x8_t v = simde_vld1q_s16(test_vec[i].v); + int32_t r0 = simde_vqdmullh_laneq_s16(a, v, 0); + int32_t r1 = simde_vqdmullh_laneq_s16(a, v, 1); + int32_t r2 = simde_vqdmullh_laneq_s16(a, v, 2); + int32_t r3 = simde_vqdmullh_laneq_s16(a, v, 3); + int32_t r4 = simde_vqdmullh_laneq_s16(a, v, 4); + int32_t r5 = simde_vqdmullh_laneq_s16(a, v, 5); + int32_t r6 = simde_vqdmullh_laneq_s16(a, v, 6); + int32_t r7 = simde_vqdmullh_laneq_s16(a, v, 7); + + simde_assert_equal_i32(r0, test_vec[i].r0[0]); + simde_assert_equal_i32(r1, test_vec[i].r1[0]); + simde_assert_equal_i32(r2, test_vec[i].r2[0]); + simde_assert_equal_i32(r3, test_vec[i].r3[0]); + simde_assert_equal_i32(r4, test_vec[i].r4[0]); + simde_assert_equal_i32(r5, test_vec[i].r5[0]); + simde_assert_equal_i32(r6, test_vec[i].r6[0]); + simde_assert_equal_i32(r7, test_vec[i].r7[0]); + } + + return 0; +} + +static int +test_simde_vqdmulls_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[1]; + int32_t v[2]; + int64_t r0[1]; + int64_t r1[1]; + } test_vec[] = { + { { INT32_C( 634418) }, + { INT32_C( 173921), -INT32_C( 71264) }, + { INT64_C( 220677225956), }, + { -INT64_C( 90422328704), } }, + { { -INT32_C( 320139) }, + { -INT32_C( 651577), INT32_C( 56580) }, + { INT64_C( 417190418406), }, + { -INT64_C( 36226929240), } }, + { { INT32_C( 847605) }, + { -INT32_C( 250775), INT32_C( 839913) }, + { -INT64_C( 425116287750), }, + { INT64_C( 1423828916730), } }, + { { INT32_C( 958733) }, + { INT32_C( 723071), -INT32_C( 872660) }, + { INT64_C( 1386464058086), }, + { -INT64_C( 1673295879560), } }, + { { -INT32_C( 965204) }, + { -INT32_C( 86642), -INT32_C( 329177) }, + { INT64_C( 167254409936), }, + { INT64_C( 635445914216), } }, + { { -INT32_C( 946659) }, + { -INT32_C( 92694), INT32_C( 47468) }, + { INT64_C( 175499218692), }, + { -INT64_C( 89872018824), } }, + { { INT32_C( 127025) }, + { INT32_C( 290758), INT32_C( 248889) }, + { INT64_C( 73867069900), }, + { INT64_C( 63230250450), } }, + { { INT32_C( 718100) }, + { INT32_C( 644267), INT32_C( 216043) }, + { INT64_C( 925296265400), }, + { INT64_C( 310280956600), } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int32_t a = test_vec[i].a[0]; + simde_int32x2_t v = simde_vld1_s32(test_vec[i].v); + int64_t r0 = simde_vqdmulls_lane_s32(a, v, 0); + int64_t r1 = simde_vqdmulls_lane_s32(a, v, 1); + + simde_assert_equal_i64(r0, test_vec[i].r0[0]); + simde_assert_equal_i64(r1, test_vec[i].r1[0]); + } + + return 0; +} + +static int +test_simde_vqdmulls_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[1]; + int32_t v[4]; + int64_t r0[1]; + int64_t r1[1]; + int64_t r2[1]; + int64_t r3[1]; + } test_vec[] = { + { { -INT32_C( 659075) }, + { -INT32_C( 730909), -INT32_C( 561211), INT32_C( 160323), -INT32_C( 512210) }, + { INT64_C( 963447698350), }, + { INT64_C( 739760279650), }, + { -INT64_C( 211329762450), }, + { INT64_C( 675169611500), } }, + { { INT32_C( 204549) }, + { INT32_C( 689474), INT32_C( 474503), INT32_C( 297546), -INT32_C( 478235) }, + { INT64_C( 282062434452), }, + { INT64_C( 194118228294), }, + { INT64_C( 121725473508), }, + { -INT64_C( 195644982030), } }, + { { INT32_C( 417996) }, + { -INT32_C( 767918), INT32_C( 493932), INT32_C( 433526), -INT32_C( 400079) }, + { -INT64_C( 641973304656), }, + { INT64_C( 412923200544), }, + { INT64_C( 362424267792), }, + { -INT64_C( 334462843368), } }, + { { INT32_C( 49844) }, + { INT32_C( 345692), INT32_C( 362725), INT32_C( 925044), INT32_C( 822684) }, + { INT64_C( 34461344096), }, + { INT64_C( 36159329800), }, + { INT64_C( 92215786272), }, + { INT64_C( 82011722592), } }, + { { INT32_C( 9571) }, + { -INT32_C( 573965), -INT32_C( 304784), -INT32_C( 114966), INT32_C( 287780) }, + { -INT64_C( 10986838030), }, + { -INT64_C( 5834175328), }, + { -INT64_C( 2200679172), }, + { INT64_C( 5508684760), } }, + { { INT32_C( 569493) }, + { INT32_C( 879727), -INT32_C( 968515), INT32_C( 536749), -INT32_C( 476337) }, + { INT64_C( 1001996736822), }, + { -INT64_C( 1103125025790), }, + { INT64_C( 611349596514), }, + { -INT64_C( 542541174282), } }, + { { -INT32_C( 407646) }, + { -INT32_C( 528658), -INT32_C( 336851), -INT32_C( 658175), INT32_C( 568552) }, + { INT64_C( 431010638136), }, + { INT64_C( 274631925492), }, + { INT64_C( 536604812100), }, + { -INT64_C( 463535897184), } }, + { { -INT32_C( 639673) }, + { INT32_C( 742826), -INT32_C( 190632), INT32_C( 938444), INT32_C( 921723) }, + { -INT64_C( 950331471796), }, + { INT64_C( 243884286672), }, + { -INT64_C( 1200594577624), }, + { -INT64_C( 1179202633158), } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int32_t a = test_vec[i].a[0]; + simde_int32x4_t v = simde_vld1q_s32(test_vec[i].v); + int64_t r0 = simde_vqdmulls_laneq_s32(a, v, 0); + int64_t r1 = simde_vqdmulls_laneq_s32(a, v, 1); + int64_t r2 = simde_vqdmulls_laneq_s32(a, v, 2); + int64_t r3 = simde_vqdmulls_laneq_s32(a, v, 3); + + simde_assert_equal_i64(r0, test_vec[i].r0[0]); + simde_assert_equal_i64(r1, test_vec[i].r1[0]); + simde_assert_equal_i64(r2, test_vec[i].r2[0]); + simde_assert_equal_i64(r3, test_vec[i].r3[0]); + } + + return 0; +} + +static int +test_simde_vqdmull_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[4]; + int16_t v[4]; + int32_t r0[4]; + int32_t r1[4]; + int32_t r2[4]; + int32_t r3[4]; + } test_vec[] = { + { { -INT16_C( 2243), INT16_C( 573), -INT16_C( 7559), INT16_C( 5666) }, + { -INT16_C( 1598), -INT16_C( 7039), INT16_C( 3906), -INT16_C( 7938) }, + { INT32_C( 7168628), -INT32_C( 1831308), INT32_C( 24158564), -INT32_C( 18108536) }, + { INT32_C( 31576954), -INT32_C( 8066694), INT32_C( 106415602), -INT32_C( 79765948) }, + { -INT32_C( 17522316), INT32_C( 4476276), -INT32_C( 59050908), INT32_C( 44262792) }, + { INT32_C( 35609868), -INT32_C( 9096948), INT32_C( 120006684), -INT32_C( 89953416) } }, + { { -INT16_C( 846), INT16_C( 6064), INT16_C( 4978), -INT16_C( 3986) }, + { INT16_C( 1442), -INT16_C( 6294), -INT16_C( 3742), -INT16_C( 9786) }, + { -INT32_C( 2439864), INT32_C( 17488576), INT32_C( 14356552), -INT32_C( 11495624) }, + { INT32_C( 10649448), -INT32_C( 76333632), -INT32_C( 62663064), INT32_C( 50175768) }, + { INT32_C( 6331464), -INT32_C( 45382976), -INT32_C( 37255352), INT32_C( 29831224) }, + { INT32_C( 16557912), -INT32_C( 118684608), -INT32_C( 97429416), INT32_C( 78013992) } }, + { { -INT16_C( 7513), INT16_C( 4446), -INT16_C( 6868), INT16_C( 9130) }, + { INT16_C( 2731), -INT16_C( 6525), -INT16_C( 9595), -INT16_C( 8643) }, + { -INT32_C( 41036006), INT32_C( 24284052), -INT32_C( 37513016), INT32_C( 49868060) }, + { INT32_C( 98044650), -INT32_C( 58020300), INT32_C( 89627400), -INT32_C( 119146500) }, + { INT32_C( 144174470), -INT32_C( 85318740), INT32_C( 131796920), -INT32_C( 175204700) }, + { INT32_C( 129869718), -INT32_C( 76853556), INT32_C( 118720248), -INT32_C( 157821180) } }, + { { -INT16_C( 7578), INT16_C( 6097), -INT16_C( 4022), INT16_C( 1187) }, + { INT16_C( 2209), INT16_C( 801), -INT16_C( 1382), INT16_C( 2762) }, + { -INT32_C( 33479604), INT32_C( 26936546), -INT32_C( 17769196), INT32_C( 5244166) }, + { -INT32_C( 12139956), INT32_C( 9767394), -INT32_C( 6443244), INT32_C( 1901574) }, + { INT32_C( 20945592), -INT32_C( 16852108), INT32_C( 11116808), -INT32_C( 3280868) }, + { -INT32_C( 41860872), INT32_C( 33679828), -INT32_C( 22217528), INT32_C( 6556988) } }, + { { -INT16_C( 4146), INT16_C( 5467), -INT16_C( 8224), INT16_C( 5701) }, + { -INT16_C( 2048), -INT16_C( 8137), INT16_C( 6699), -INT16_C( 7780) }, + { INT32_C( 16982016), -INT32_C( 22392832), INT32_C( 33685504), -INT32_C( 23351296) }, + { INT32_C( 67472004), -INT32_C( 88969958), INT32_C( 133837376), -INT32_C( 92778074) }, + { -INT32_C( 55548108), INT32_C( 73246866), -INT32_C( 110185152), INT32_C( 76381998) }, + { INT32_C( 64511760), -INT32_C( 85066520), INT32_C( 127965440), -INT32_C( 88707560) } }, + { { -INT16_C( 4260), -INT16_C( 4816), INT16_C( 9532), INT16_C( 2495) }, + { INT16_C( 3361), INT16_C( 5063), INT16_C( 1754), -INT16_C( 9626) }, + { -INT32_C( 28635720), -INT32_C( 32373152), INT32_C( 64074104), INT32_C( 16771390) }, + { -INT32_C( 43136760), -INT32_C( 48766816), INT32_C( 96521032), INT32_C( 25264370) }, + { -INT32_C( 14944080), -INT32_C( 16894528), INT32_C( 33438256), INT32_C( 8752460) }, + { INT32_C( 82013520), INT32_C( 92717632), -INT32_C( 183510064), -INT32_C( 48033740) } }, + { { -INT16_C( 2019), INT16_C( 5182), -INT16_C( 8599), -INT16_C( 1275) }, + { -INT16_C( 3364), INT16_C( 6609), -INT16_C( 3140), -INT16_C( 6139) }, + { INT32_C( 13583832), -INT32_C( 34864496), INT32_C( 57854072), INT32_C( 8578200) }, + { -INT32_C( 26687142), INT32_C( 68495676), -INT32_C( 113661582), -INT32_C( 16852950) }, + { INT32_C( 12679320), -INT32_C( 32542960), INT32_C( 54001720), INT32_C( 8007000) }, + { INT32_C( 24789282), -INT32_C( 63624596), INT32_C( 105578522), INT32_C( 15654450) } }, + { { INT16_C( 3840), INT16_C( 1242), INT16_C( 2755), INT16_C( 8053) }, + { -INT16_C( 2358), -INT16_C( 3412), -INT16_C( 545), INT16_C( 7951) }, + { -INT32_C( 18109440), -INT32_C( 5857272), -INT32_C( 12992580), -INT32_C( 37977948) }, + { -INT32_C( 26204160), -INT32_C( 8475408), -INT32_C( 18800120), -INT32_C( 54953672) }, + { -INT32_C( 4185600), -INT32_C( 1353780), -INT32_C( 3002950), -INT32_C( 8777770) }, + { INT32_C( 61063680), INT32_C( 19750284), INT32_C( 43810010), INT32_C( 128058806) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); + simde_int16x4_t v = simde_vld1_s16(test_vec[i].v); + simde_int32x4_t r0 = simde_vqdmull_lane_s16(a, v, 0); + simde_int32x4_t r1 = simde_vqdmull_lane_s16(a, v, 1); + simde_int32x4_t r2 = simde_vqdmull_lane_s16(a, v, 2); + simde_int32x4_t r3 = simde_vqdmull_lane_s16(a, v, 3); + + simde_test_arm_neon_assert_equal_i32x4(r0, simde_vld1q_s32(test_vec[i].r0)); + simde_test_arm_neon_assert_equal_i32x4(r1, simde_vld1q_s32(test_vec[i].r1)); + simde_test_arm_neon_assert_equal_i32x4(r2, simde_vld1q_s32(test_vec[i].r2)); + simde_test_arm_neon_assert_equal_i32x4(r3, simde_vld1q_s32(test_vec[i].r3)); + } + + return 0; +} + +static int +test_simde_vqdmull_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[2]; + int32_t v[2]; + int64_t r0[2]; + int64_t r1[2]; + } test_vec[] = { + { { -INT32_C( 9551495), -INT32_C( 4244894) }, + { INT32_C( 9500016), -INT32_C( 4578713) }, + { -INT64_C( 181478710647840), -INT64_C( 80653121836608) }, + { INT64_C( 87467108651870), INT64_C( 38872302682844) } }, + { { INT32_C( 624903), -INT32_C( 7769809) }, + { INT32_C( 8772996), -INT32_C( 1316132) }, + { INT64_C( 10964543038776), -INT64_C( 136329006555528) }, + { -INT64_C( 1644909670392), INT64_C( 20452188517576) } }, + { { INT32_C( 3990539), INT32_C( 8360746) }, + { INT32_C( 3739031), INT32_C( 186790) }, + { INT64_C( 29841498055418), INT64_C( 62522176954252) }, + { INT64_C( 1490785559620), INT64_C( 3123407490680) } }, + { { INT32_C( 4527060), INT32_C( 1305553) }, + { -INT32_C( 9235343), INT32_C( 4280947) }, + { -INT64_C( 83617903763160), -INT64_C( 24114459519358) }, + { INT64_C( 38760207851640), INT64_C( 11178006397382) } }, + { { INT32_C( 2233801), INT32_C( 2119374) }, + { -INT32_C( 5830491), INT32_C( 5886598) }, + { -INT64_C( 26048313252582), -INT64_C( 24713982065268) }, + { INT64_C( 26298976997996), INT64_C( 24951805499304) } }, + { { INT32_C( 6369766), -INT32_C( 5994389) }, + { -INT32_C( 581627), INT32_C( 174889) }, + { -INT64_C( 7409655778564), INT64_C( 6972996981806) }, + { INT64_C( 2228004011948), -INT64_C( 2096705395642) } }, + { { INT32_C( 4529939), INT32_C( 8107248) }, + { -INT32_C( 8279077), INT32_C( 2553033) }, + { -INT64_C( 75007427572606), -INT64_C( 134241060900192) }, + { INT64_C( 23130167509974), INT64_C( 41396143366368) } }, + { { INT32_C( 2201602), INT32_C( 5722167) }, + { -INT32_C( 4665805), INT32_C( 4267400) }, + { -INT64_C( 20544491239220), -INT64_C( 53397030798870) }, + { INT64_C( 18790232749600), INT64_C( 48837550911600) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); + simde_int32x2_t v = simde_vld1_s32(test_vec[i].v); + simde_int64x2_t r0 = simde_vqdmull_lane_s32(a, v, 0); + simde_int64x2_t r1 = simde_vqdmull_lane_s32(a, v, 1); + + simde_test_arm_neon_assert_equal_i64x2(r0, simde_vld1q_s64(test_vec[i].r0)); + simde_test_arm_neon_assert_equal_i64x2(r1, simde_vld1q_s64(test_vec[i].r1)); + } + + return 0; +} + +static int +test_simde_vqdmull_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[4]; + int16_t v[8]; + int32_t r0[4]; + int32_t r1[4]; + int32_t r2[4]; + int32_t r3[4]; + int32_t r4[4]; + int32_t r5[4]; + int32_t r6[4]; + int32_t r7[4]; + } test_vec[] = { + { { -INT16_C( 2296), -INT16_C( 9611), -INT16_C( 2943), -INT16_C( 2915) }, + { -INT16_C( 837), INT16_C( 4146), -INT16_C( 3620), -INT16_C( 7916), + -INT16_C( 1608), -INT16_C( 1765), -INT16_C( 8137), INT16_C( 1464) }, + { INT32_C( 3843504), INT32_C( 16088814), INT32_C( 4926582), INT32_C( 4879710) }, + { -INT32_C( 19038432), -INT32_C( 79694412), -INT32_C( 24403356), -INT32_C( 24171180) }, + { INT32_C( 16623040), INT32_C( 69583640), INT32_C( 21307320), INT32_C( 21104600) }, + { INT32_C( 36350272), INT32_C( 152161352), INT32_C( 46593576), INT32_C( 46150280) }, + { INT32_C( 7383936), INT32_C( 30908976), INT32_C( 9464688), INT32_C( 9374640) }, + { INT32_C( 8104880), INT32_C( 33926830), INT32_C( 10388790), INT32_C( 10289950) }, + { INT32_C( 37365104), INT32_C( 156409414), INT32_C( 47894382), INT32_C( 47438710) }, + { -INT32_C( 6722688), -INT32_C( 28141008), -INT32_C( 8617104), -INT32_C( 8535120) } }, + { { INT16_C( 9591), -INT16_C( 4056), INT16_C( 7820), -INT16_C( 4853) }, + { -INT16_C( 8747), -INT16_C( 785), INT16_C( 4014), INT16_C( 3743), + -INT16_C( 8890), INT16_C( 4960), -INT16_C( 3299), INT16_C( 2926) }, + { -INT32_C( 167784954), INT32_C( 70955664), -INT32_C( 136803080), INT32_C( 84898382) }, + { -INT32_C( 15057870), INT32_C( 6367920), -INT32_C( 12277400), INT32_C( 7619210) }, + { INT32_C( 76996548), -INT32_C( 32561568), INT32_C( 62778960), -INT32_C( 38959884) }, + { INT32_C( 71798226), -INT32_C( 30363216), INT32_C( 58540520), -INT32_C( 36329558) }, + { -INT32_C( 170527980), INT32_C( 72115680), -INT32_C( 139039600), INT32_C( 86286340) }, + { INT32_C( 95142720), -INT32_C( 40235520), INT32_C( 77574400), -INT32_C( 48141760) }, + { -INT32_C( 63281418), INT32_C( 26761488), -INT32_C( 51596360), INT32_C( 32020094) }, + { INT32_C( 56126532), -INT32_C( 23735712), INT32_C( 45762640), -INT32_C( 28399756) } }, + { { INT16_C( 7691), INT16_C( 6951), -INT16_C( 9747), INT16_C( 1914) }, + { -INT16_C( 5178), -INT16_C( 2026), INT16_C( 9524), INT16_C( 1021), + INT16_C( 7259), INT16_C( 3467), INT16_C( 3264), -INT16_C( 3978) }, + { -INT32_C( 79647996), -INT32_C( 71984556), INT32_C( 100939932), -INT32_C( 19821384) }, + { -INT32_C( 31163932), -INT32_C( 28165452), INT32_C( 39494844), -INT32_C( 7755528) }, + { INT32_C( 146498168), INT32_C( 132402648), -INT32_C( 185660856), INT32_C( 36457872) }, + { INT32_C( 15705022), INT32_C( 14193942), -INT32_C( 19903374), INT32_C( 3908388) }, + { INT32_C( 111657938), INT32_C( 100914618), -INT32_C( 141506946), INT32_C( 27787452) }, + { INT32_C( 53329394), INT32_C( 48198234), -INT32_C( 67585698), INT32_C( 13271676) }, + { INT32_C( 50206848), INT32_C( 45376128), -INT32_C( 63628416), INT32_C( 12494592) }, + { -INT32_C( 61189596), -INT32_C( 55302156), INT32_C( 77547132), -INT32_C( 15227784) } }, + { { -INT16_C( 4214), -INT16_C( 4132), -INT16_C( 7654), INT16_C( 3308) }, + { -INT16_C( 3137), -INT16_C( 5177), INT16_C( 8825), INT16_C( 5213), + INT16_C( 4021), -INT16_C( 1222), -INT16_C( 2054), -INT16_C( 8636) }, + { INT32_C( 26438636), INT32_C( 25924168), INT32_C( 48021196), -INT32_C( 20754392) }, + { INT32_C( 43631756), INT32_C( 42782728), INT32_C( 79249516), -INT32_C( 34251032) }, + { -INT32_C( 74377100), -INT32_C( 72929800), -INT32_C( 135093100), INT32_C( 58386200) }, + { -INT32_C( 43935164), -INT32_C( 43080232), -INT32_C( 79800604), INT32_C( 34489208) }, + { -INT32_C( 33888988), -INT32_C( 33229544), -INT32_C( 61553468), INT32_C( 26602936) }, + { INT32_C( 10299016), INT32_C( 10098608), INT32_C( 18706376), -INT32_C( 8084752) }, + { INT32_C( 17311112), INT32_C( 16974256), INT32_C( 31442632), -INT32_C( 13589264) }, + { INT32_C( 72784208), INT32_C( 71367904), INT32_C( 132199888), -INT32_C( 57135776) } }, + { { INT16_C( 8803), INT16_C( 8617), INT16_C( 9238), -INT16_C( 7751) }, + { INT16_C( 8128), INT16_C( 8019), INT16_C( 2912), -INT16_C( 8386), + -INT16_C( 4694), INT16_C( 8185), INT16_C( 7703), -INT16_C( 8880) }, + { INT32_C( 143101568), INT32_C( 140077952), INT32_C( 150172928), -INT32_C( 126000256) }, + { INT32_C( 141182514), INT32_C( 138199446), INT32_C( 148159044), -INT32_C( 124310538) }, + { INT32_C( 51268672), INT32_C( 50185408), INT32_C( 53802112), -INT32_C( 45141824) }, + { -INT32_C( 147643916), -INT32_C( 144524324), -INT32_C( 154939736), INT32_C( 129999772) }, + { -INT32_C( 82642564), -INT32_C( 80896396), -INT32_C( 86726344), INT32_C( 72766388) }, + { INT32_C( 144105110), INT32_C( 141060290), INT32_C( 151226060), -INT32_C( 126883870) }, + { INT32_C( 135619018), INT32_C( 132753502), INT32_C( 142320628), -INT32_C( 119411906) }, + { -INT32_C( 156341280), -INT32_C( 153037920), -INT32_C( 164066880), INT32_C( 137657760) } }, + { { INT16_C( 4812), INT16_C( 2790), INT16_C( 8407), -INT16_C( 6588) }, + { -INT16_C( 227), -INT16_C( 7945), INT16_C( 7398), INT16_C( 8350), + INT16_C( 3728), INT16_C( 359), INT16_C( 2109), INT16_C( 5197) }, + { -INT32_C( 2184648), -INT32_C( 1266660), -INT32_C( 3816778), INT32_C( 2990952) }, + { -INT32_C( 76462680), -INT32_C( 44333100), -INT32_C( 133587230), INT32_C( 104683320) }, + { INT32_C( 71198352), INT32_C( 41280840), INT32_C( 124389972), -INT32_C( 97476048) }, + { INT32_C( 80360400), INT32_C( 46593000), INT32_C( 140396900), -INT32_C( 110019600) }, + { INT32_C( 35878272), INT32_C( 20802240), INT32_C( 62682592), -INT32_C( 49120128) }, + { INT32_C( 3455016), INT32_C( 2003220), INT32_C( 6036226), -INT32_C( 4730184) }, + { INT32_C( 20297016), INT32_C( 11768220), INT32_C( 35460726), -INT32_C( 27788184) }, + { INT32_C( 50015928), INT32_C( 28999260), INT32_C( 87382358), -INT32_C( 68475672) } }, + { { -INT16_C( 6029), -INT16_C( 5946), -INT16_C( 2972), INT16_C( 1267) }, + { -INT16_C( 3374), INT16_C( 1580), -INT16_C( 5727), -INT16_C( 2042), + INT16_C( 5896), -INT16_C( 1717), INT16_C( 3183), INT16_C( 9987) }, + { INT32_C( 40683692), INT32_C( 40123608), INT32_C( 20055056), -INT32_C( 8549716) }, + { -INT32_C( 19051640), -INT32_C( 18789360), -INT32_C( 9391520), INT32_C( 4003720) }, + { INT32_C( 69056166), INT32_C( 68105484), INT32_C( 34041288), -INT32_C( 14512218) }, + { INT32_C( 24622436), INT32_C( 24283464), INT32_C( 12137648), -INT32_C( 5174428) }, + { -INT32_C( 71093968), -INT32_C( 70115232), -INT32_C( 35045824), INT32_C( 14940464) }, + { INT32_C( 20703586), INT32_C( 20418564), INT32_C( 10205848), -INT32_C( 4350878) }, + { -INT32_C( 38380614), -INT32_C( 37852236), -INT32_C( 18919752), INT32_C( 8065722) }, + { -INT32_C( 120423246), -INT32_C( 118765404), -INT32_C( 59362728), INT32_C( 25307058) } }, + { { INT16_C( 9052), -INT16_C( 5974), INT16_C( 257), INT16_C( 9738) }, + { -INT16_C( 2364), INT16_C( 176), -INT16_C( 3646), INT16_C( 3711), + INT16_C( 8971), -INT16_C( 7939), -INT16_C( 4436), INT16_C( 7553) }, + { -INT32_C( 42797856), INT32_C( 28245072), -INT32_C( 1215096), -INT32_C( 46041264) }, + { INT32_C( 3186304), -INT32_C( 2102848), INT32_C( 90464), INT32_C( 3427776) }, + { -INT32_C( 66007184), INT32_C( 43562408), -INT32_C( 1874044), -INT32_C( 71009496) }, + { INT32_C( 67183944), -INT32_C( 44339028), INT32_C( 1907454), INT32_C( 72275436) }, + { INT32_C( 162410984), -INT32_C( 107185508), INT32_C( 4611094), INT32_C( 174719196) }, + { -INT32_C( 143727656), INT32_C( 94855172), -INT32_C( 4080646), -INT32_C( 154619964) }, + { -INT32_C( 80309344), INT32_C( 53001328), -INT32_C( 2280104), -INT32_C( 86395536) }, + { INT32_C( 136739512), -INT32_C( 90243244), INT32_C( 3882242), INT32_C( 147102228) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); + simde_int16x8_t v = simde_vld1q_s16(test_vec[i].v); + simde_int32x4_t r0 = simde_vqdmull_laneq_s16(a, v, 0); + simde_int32x4_t r1 = simde_vqdmull_laneq_s16(a, v, 1); + simde_int32x4_t r2 = simde_vqdmull_laneq_s16(a, v, 2); + simde_int32x4_t r3 = simde_vqdmull_laneq_s16(a, v, 3); + simde_int32x4_t r4 = simde_vqdmull_laneq_s16(a, v, 4); + simde_int32x4_t r5 = simde_vqdmull_laneq_s16(a, v, 5); + simde_int32x4_t r6 = simde_vqdmull_laneq_s16(a, v, 6); + simde_int32x4_t r7 = simde_vqdmull_laneq_s16(a, v, 7); + + simde_test_arm_neon_assert_equal_i32x4(r0, simde_vld1q_s32(test_vec[i].r0)); + simde_test_arm_neon_assert_equal_i32x4(r1, simde_vld1q_s32(test_vec[i].r1)); + simde_test_arm_neon_assert_equal_i32x4(r2, simde_vld1q_s32(test_vec[i].r2)); + simde_test_arm_neon_assert_equal_i32x4(r3, simde_vld1q_s32(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_i32x4(r4, simde_vld1q_s32(test_vec[i].r4)); + simde_test_arm_neon_assert_equal_i32x4(r5, simde_vld1q_s32(test_vec[i].r5)); + simde_test_arm_neon_assert_equal_i32x4(r6, simde_vld1q_s32(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_i32x4(r7, simde_vld1q_s32(test_vec[i].r7)); + } + + return 0; +} + +static int +test_simde_vqdmull_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[2]; + int32_t v[4]; + int64_t r0[2]; + int64_t r1[2]; + int64_t r2[2]; + int64_t r3[2]; + } test_vec[] = { + { { -INT32_C( 740313), -INT32_C( 1309248) }, + { INT32_C( 3275217), -INT32_C( 5899901), -INT32_C( 746999), INT32_C( 8043453) }, + { -INT64_C( 4849371445842), -INT64_C( 8576142613632) }, + { INT64_C( 8735546818026), INT64_C( 15448867168896) }, + { INT64_C( 1106026141374), INT64_C( 1956013893504) }, + { -INT64_C( 11909345641578), -INT64_C( 21061749506688) } }, + { { -INT32_C( 2092703), INT32_C( 1079828) }, + { INT32_C( 8757605), INT32_C( 294912), INT32_C( 9379338), INT32_C( 9257219) }, + { -INT64_C( 36654132512630), INT64_C( 18913414183880) }, + { -INT64_C( 1234326454272), INT64_C( 636908470272) }, + { -INT64_C( 39256337541228), INT64_C( 20256143587728) }, + { -INT64_C( 38745219945914), INT64_C( 19992408556664) } }, + { { INT32_C( 3649334), INT32_C( 4046007) }, + { -INT32_C( 2938652), INT32_C( 6787869), INT32_C( 3565613), INT32_C( 8520889) }, + { -INT64_C( 21448245315536), -INT64_C( 23779613125128) }, + { INT64_C( 49542402258492), INT64_C( 54927530978166) }, + { INT64_C( 26024225503484), INT64_C( 28852990314582) }, + { INT64_C( 62191139875852), INT64_C( 68951153080446) } }, + { { -INT32_C( 5837561), -INT32_C( 5855016) }, + { -INT32_C( 7240451), INT32_C( 6234870), INT32_C( 8338179), -INT32_C( 510704) }, + { INT64_C( 84533148760022), INT64_C( 84785912904432) }, + { -INT64_C( 72792867904140), -INT64_C( 73010527215840) }, + { -INT64_C( 97349257082838), -INT64_C( 97640342911728) }, + { INT64_C( 5962531505888), INT64_C( 5980360182528) } }, + { { -INT32_C( 8207047), INT32_C( 2298705) }, + { -INT32_C( 681158), INT32_C( 8270752), INT32_C( 3728012), -INT32_C( 5271108) }, + { INT64_C( 11180591440852), -INT64_C( 3131562600780) }, + { -INT64_C( 135756900778688), INT64_C( 38024037952320) }, + { -INT64_C( 61191939401128), INT64_C( 17139199648920) }, + { INT64_C( 86520462196152), -INT64_C( 24233444630280) } }, + { { -INT32_C( 339794), -INT32_C( 4389703) }, + { INT32_C( 5800265), -INT32_C( 2663644), INT32_C( 5291255), INT32_C( 3453411) }, + { -INT64_C( 3941790490820), -INT64_C( 50922881342590) }, + { INT64_C( 1810180498672), INT64_C( 23385212115464) }, + { -INT64_C( 3595873402940), -INT64_C( 46454075894530) }, + { -INT64_C( 2346896674668), -INT64_C( 30318897253866) } }, + { { -INT32_C( 9164937), -INT32_C( 3763377) }, + { -INT32_C( 550606), -INT32_C( 8091428), INT32_C( 2921370), INT32_C( 4595120) }, + { INT64_C( 10092538603644), INT64_C( 4144275912924) }, + { INT64_C( 148314855720072), INT64_C( 60902188064712) }, + { -INT64_C( 53548344007380), -INT64_C( 21988433332980) }, + { -INT64_C( 84227970614880), -INT64_C( 34586337840480) } }, + { { -INT32_C( 6331007), INT32_C( 2058935) }, + { -INT32_C( 1419668), INT32_C( 9806776), INT32_C( 973805), INT32_C( 167524) }, + { INT64_C( 17975856091352), -INT64_C( 5846008267160) }, + { -INT64_C( 124173535006864), INT64_C( 40383028687120) }, + { -INT64_C( 12330332543270), INT64_C( 4010002395350) }, + { -INT64_C( 2121191233336), INT64_C( 689842053880) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); + simde_int32x4_t v = simde_vld1q_s32(test_vec[i].v); + simde_int64x2_t r0 = simde_vqdmull_laneq_s32(a, v, 0); + simde_int64x2_t r1 = simde_vqdmull_laneq_s32(a, v, 1); + simde_int64x2_t r2 = simde_vqdmull_laneq_s32(a, v, 2); + simde_int64x2_t r3 = simde_vqdmull_laneq_s32(a, v, 3); + + simde_test_arm_neon_assert_equal_i64x2(r0, simde_vld1q_s64(test_vec[i].r0)); + simde_test_arm_neon_assert_equal_i64x2(r1, simde_vld1q_s64(test_vec[i].r1)); + simde_test_arm_neon_assert_equal_i64x2(r2, simde_vld1q_s64(test_vec[i].r2)); + simde_test_arm_neon_assert_equal_i64x2(r3, simde_vld1q_s64(test_vec[i].r3)); + } + + return 0; +} + + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmullh_lane_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmulls_lane_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmullh_laneq_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmulls_laneq_s32) + +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmull_lane_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmull_lane_s32) + +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmull_laneq_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmull_laneq_s32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qdmull_n.c b/test/arm/neon/qdmull_n.c new file mode 100644 index 000000000..68ae28a12 --- /dev/null +++ b/test/arm/neon/qdmull_n.c @@ -0,0 +1,100 @@ +#define SIMDE_TEST_ARM_NEON_INSN qdmull_n + +#include "test-neon.h" +#include "../../../simde/arm/neon/qdmull_n.h" + +static int +test_simde_vqdmull_n_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[4]; + int16_t b; + int32_t r[4]; + } test_vec[] = { + { { INT16_C( 6624), -INT16_C( 9205), -INT16_C( 5109), -INT16_C( 284) }, + INT16_C( 5819), + { INT32_C( 77090112), -INT32_C( 107127790), -INT32_C( 59458542), -INT32_C( 3305192) } }, + { { -INT16_C( 6673), -INT16_C( 4381), -INT16_C( 3216), INT16_C( 8408) }, + INT16_C( 8183), + { -INT32_C( 109210318), -INT32_C( 71699446), -INT32_C( 52633056), INT32_C( 137605328) } }, + { { INT16_C( 3114), INT16_C( 6953), INT16_C( 9387), -INT16_C( 2558) }, + -INT16_C( 8943), + { -INT32_C( 55697004), -INT32_C( 124361358), -INT32_C( 167895882), INT32_C( 45752388) } }, + { { INT16_C( 3872), -INT16_C( 8647), INT16_C( 4643), INT16_C( 6634) }, + -INT16_C( 7367), + { -INT32_C( 57050048), INT32_C( 127404898), -INT32_C( 68409962), -INT32_C( 97745356) } }, + { { INT16_C( 5569), INT16_C( 8610), INT16_C( 4174), INT16_C( 1241) }, + INT16_C( 4261), + { INT32_C( 47459018), INT32_C( 73374420), INT32_C( 35570828), INT32_C( 10575802) } }, + { { -INT16_C( 4157), INT16_C( 7246), -INT16_C( 8418), INT16_C( 2893) }, + -INT16_C( 3990), + { INT32_C( 33172860), -INT32_C( 57823080), INT32_C( 67175640), -INT32_C( 23086140) } }, + { { INT16_C( 8394), -INT16_C( 4925), INT16_C( 9425), INT16_C( 6873) }, + INT16_C( 2131), + { INT32_C( 35775228), -INT32_C( 20990350), INT32_C( 40169350), INT32_C( 29292726) } }, + { { -INT16_C( 7623), -INT16_C( 8682), INT16_C( 9341), -INT16_C( 7120) }, + INT16_C( 6046), + { -INT32_C( 92177316), -INT32_C( 104982744), INT32_C( 112951372), -INT32_C( 86095040) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); + int16_t b = test_vec[i].b; + simde_int32x4_t r = simde_vqdmull_n_s16(a, b); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqdmull_n_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[2]; + int32_t b; + int64_t r[2]; + } test_vec[] = { + { { INT32_C( 19666), -INT32_C( 932172) }, + INT32_C( 274385), + { INT64_C( 10792110820), -INT64_C( 511548028440) } }, + { { -INT32_C( 6416), -INT32_C( 41797) }, + INT32_C( 498878), + { -INT64_C( 6401602496), -INT64_C( 41703207532) } }, + { { INT32_C( 89019), -INT32_C( 240137) }, + INT32_C( 171421), + { INT64_C( 30519451998), -INT64_C( 82329049354) } }, + { { INT32_C( 280588), -INT32_C( 63104) }, + -INT32_C( 831837), + { -INT64_C( 466806960312), INT64_C( 104984484096) } }, + { { INT32_C( 203110), INT32_C( 487428) }, + INT32_C( 670677), + { INT64_C( 272442410940), INT64_C( 653813497512) } }, + { { -INT32_C( 25065), -INT32_C( 686529) }, + -INT32_C( 336578), + { INT64_C( 16872655140), INT64_C( 462141115524) } }, + { { INT32_C( 627469), INT32_C( 977133) }, + INT32_C( 708625), + { INT64_C( 889280440250), INT64_C( 1384841744250) } }, + { { INT32_C( 758643), -INT32_C( 94691) }, + -INT32_C( 850570), + { -INT64_C( 1290557953020), INT64_C( 161082647740) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); + int32_t b = test_vec[i].b; + simde_int64x2_t r = simde_vqdmull_n_s32(a, b); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmull_n_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqdmull_n_s32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/reinterpret.c b/test/arm/neon/reinterpret.c index 6f4a7af62..fe0c92ac7 100644 --- a/test/arm/neon/reinterpret.c +++ b/test/arm/neon/reinterpret.c @@ -5640,6 +5640,33 @@ test_simde_vreinterpret_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { return 0; } +static int +test_simde_vreinterpret_u64_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) } }, + { { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( 976.50), SIMDE_FLOAT16_VALUE( -31.19) } }, + { { SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -598.00) } }, + { { SIMDE_FLOAT16_VALUE( -226.00), SIMDE_FLOAT16_VALUE( -520.50), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( -407.50) } }, + { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -439.75), SIMDE_FLOAT16_VALUE( -450.75) } }, + { { SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -721.00), SIMDE_FLOAT16_VALUE( -692.00), SIMDE_FLOAT16_VALUE( -858.00) } }, + { { SIMDE_FLOAT16_VALUE( -833.00), SIMDE_FLOAT16_VALUE( -714.00), SIMDE_FLOAT16_VALUE( 428.50), SIMDE_FLOAT16_VALUE( 871.50) } }, + { { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 972.50), SIMDE_FLOAT16_VALUE( 298.75), SIMDE_FLOAT16_VALUE( -919.50) } }, + { { SIMDE_FLOAT16_VALUE( -990.00), SIMDE_FLOAT16_VALUE( 258.50), SIMDE_FLOAT16_VALUE( 727.00), SIMDE_FLOAT16_VALUE( -48.00) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_uint64x1_t r = simde_vreinterpret_u64_f16(a); + simde_uint64x1_private r_ = simde_uint64x1_to_private(r); + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; +} + static int test_simde_vreinterpret_u64_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { @@ -7460,36 +7487,24 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u8_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u16_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u32_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u64_f64) -SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f16_f32) -SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f16_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u64_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f16_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f16_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f16_s32) - SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f16_s64) - SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f16_s8) - SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f16_u32) - SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f16_u64) - SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f16_u8) - SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f16_f32) - SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f16_s16) - SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f16_s32) - SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f16_s64) - SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f16_s8) - SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f16_u32) - SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f16_u64) - SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f16_u8) SIMDE_TEST_FUNC_LIST_END diff --git a/test/arm/neon/test-neon.h b/test/arm/neon/test-neon.h index f6e9fc949..345f61fc2 100644 --- a/test/arm/neon/test-neon.h +++ b/test/arm/neon/test-neon.h @@ -162,6 +162,22 @@ HEDLEY_DIAGNOSTIC_POP && simde_assert_equal_v##symbol_identifier##_(sizeof(a1_) / sizeof(a1_[0]), a1_, b1_, filename, line, astr, bstr); \ } +#define SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(NT, ET, SET, element_count, modifier, symbol_identifier) \ + static int \ + simde_test_arm_neon_assert_equal_##symbol_identifier##x##element_count##x2_(simde_##NT a, simde_##NT b, ET slop, \ + const char* filename, int line, const char* astr, const char* bstr) { \ + SET a0_[sizeof(a.val[0]) / sizeof(ET)], b0_[sizeof(b.val[0]) / sizeof(ET)]; \ + SET a1_[sizeof(a.val[1]) / sizeof(ET)], b1_[sizeof(b.val[1]) / sizeof(ET)]; \ + \ + simde_vst1##modifier##_##symbol_identifier(a0_, a.val[0]); \ + simde_vst1##modifier##_##symbol_identifier(b0_, b.val[0]); \ + simde_vst1##modifier##_##symbol_identifier(a1_, a.val[1]); \ + simde_vst1##modifier##_##symbol_identifier(b1_, b.val[1]); \ + \ + return simde_assert_equal_v##symbol_identifier##_(sizeof(a0_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a0_), HEDLEY_REINTERPRET_CAST(SET*, b0_), slop, filename, line, astr, bstr) && \ + simde_assert_equal_v##symbol_identifier##_(sizeof(a1_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a1_), HEDLEY_REINTERPRET_CAST(SET*, b1_), slop, filename, line, astr, bstr); \ + } \ + #define SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_FUNCS_(NT, ET, SET, element_count, modifier, symbol_identifier) \ static simde_##NT \ simde_test_arm_neon_random_##symbol_identifier##x##element_count##x2(ET min, ET max) { \ @@ -194,20 +210,7 @@ HEDLEY_DIAGNOSTIC_POP } \ } \ \ - static int \ - simde_test_arm_neon_assert_equal_##symbol_identifier##x##element_count##x2_(simde_##NT a, simde_##NT b, ET slop, \ - const char* filename, int line, const char* astr, const char* bstr) { \ - SET a0_[sizeof(a.val[0]) / sizeof(ET)], b0_[sizeof(b.val[0]) / sizeof(ET)]; \ - SET a1_[sizeof(a.val[1]) / sizeof(ET)], b1_[sizeof(b.val[1]) / sizeof(ET)]; \ - \ - simde_vst1##modifier##_##symbol_identifier(a0_, a.val[0]); \ - simde_vst1##modifier##_##symbol_identifier(b0_, b.val[0]); \ - simde_vst1##modifier##_##symbol_identifier(a1_, a.val[1]); \ - simde_vst1##modifier##_##symbol_identifier(b1_, b.val[1]); \ - \ - return simde_assert_equal_v##symbol_identifier##_(sizeof(a0_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a0_), HEDLEY_REINTERPRET_CAST(SET*, b0_), slop, filename, line, astr, bstr) && \ - simde_assert_equal_v##symbol_identifier##_(sizeof(a1_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a1_), HEDLEY_REINTERPRET_CAST(SET*, b1_), slop, filename, line, astr, bstr); \ - } + SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(NT, ET, SET, element_count, modifier, symbol_identifier) \ #if !defined(SIMDE_BUG_INTEL_857088) HEDLEY_DIAGNOSTIC_PUSH @@ -234,6 +237,13 @@ SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_( uint32x4x2_t, uint32_ SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_( uint64x2x2_t, uint64_t, 2, q, u64, u64) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_FUNCS_(float32x4x2_t, simde_float32_t, simde_float32, 4, q, f32) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_FUNCS_(float64x2x2_t, simde_float64_t, simde_float64, 2, q, f64) +#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 +SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_FUNCS_(float16x4x2_t, simde_float16_t, simde_float16, 4, , f16) +SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_FUNCS_(float16x8x2_t, simde_float16_t, simde_float16, 8, q, f16) +#else +SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(float16x4x2_t, simde_float16_t, simde_float16, 4, , f16) +SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(float16x8x2_t, simde_float16_t, simde_float16, 8, q, f16) +#endif HEDLEY_DIAGNOSTIC_POP #endif @@ -289,6 +299,26 @@ HEDLEY_DIAGNOSTIC_POP && simde_assert_equal_v##symbol_identifier##_(sizeof(a2_) / sizeof(a2_[0]), a2_, b2_, filename, line, astr, bstr); \ } +#define SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(NT, ET, SET, element_count, modifier, symbol_identifier) \ + static int \ + simde_test_arm_neon_assert_equal_##symbol_identifier##x##element_count##x3_(simde_##NT a, simde_##NT b, ET slop, \ + const char* filename, int line, const char* astr, const char* bstr) { \ + SET a0_[sizeof(a.val[0]) / sizeof(ET)], b0_[sizeof(b.val[0]) / sizeof(ET)]; \ + SET a1_[sizeof(a.val[1]) / sizeof(ET)], b1_[sizeof(b.val[1]) / sizeof(ET)]; \ + SET a2_[sizeof(a.val[2]) / sizeof(ET)], b2_[sizeof(b.val[2]) / sizeof(ET)]; \ + \ + simde_vst1##modifier##_##symbol_identifier(a0_, a.val[0]); \ + simde_vst1##modifier##_##symbol_identifier(b0_, b.val[0]); \ + simde_vst1##modifier##_##symbol_identifier(a1_, a.val[1]); \ + simde_vst1##modifier##_##symbol_identifier(b1_, b.val[1]); \ + simde_vst1##modifier##_##symbol_identifier(a2_, a.val[2]); \ + simde_vst1##modifier##_##symbol_identifier(b2_, b.val[2]); \ + \ + return simde_assert_equal_v##symbol_identifier##_(sizeof(a0_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a0_), HEDLEY_REINTERPRET_CAST(SET*, b0_), slop, filename, line, astr, bstr) && \ + simde_assert_equal_v##symbol_identifier##_(sizeof(a1_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a1_), HEDLEY_REINTERPRET_CAST(SET*, b1_), slop, filename, line, astr, bstr) && \ + simde_assert_equal_v##symbol_identifier##_(sizeof(a2_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a2_), HEDLEY_REINTERPRET_CAST(SET*, b2_), slop, filename, line, astr, bstr); \ + } \ + #define SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_FUNCS_(NT, ET, SET, element_count, modifier, symbol_identifier) \ static simde_##NT \ simde_test_arm_neon_random_##symbol_identifier##x##element_count##x3(ET min, ET max) { \ @@ -327,24 +357,7 @@ HEDLEY_DIAGNOSTIC_POP } \ } \ \ - static int \ - simde_test_arm_neon_assert_equal_##symbol_identifier##x##element_count##x3_(simde_##NT a, simde_##NT b, ET slop, \ - const char* filename, int line, const char* astr, const char* bstr) { \ - SET a0_[sizeof(a.val[0]) / sizeof(ET)], b0_[sizeof(b.val[0]) / sizeof(ET)]; \ - SET a1_[sizeof(a.val[1]) / sizeof(ET)], b1_[sizeof(b.val[1]) / sizeof(ET)]; \ - SET a2_[sizeof(a.val[2]) / sizeof(ET)], b2_[sizeof(b.val[2]) / sizeof(ET)]; \ - \ - simde_vst1##modifier##_##symbol_identifier(a0_, a.val[0]); \ - simde_vst1##modifier##_##symbol_identifier(b0_, b.val[0]); \ - simde_vst1##modifier##_##symbol_identifier(a1_, a.val[1]); \ - simde_vst1##modifier##_##symbol_identifier(b1_, b.val[1]); \ - simde_vst1##modifier##_##symbol_identifier(a2_, a.val[2]); \ - simde_vst1##modifier##_##symbol_identifier(b2_, b.val[2]); \ - \ - return simde_assert_equal_v##symbol_identifier##_(sizeof(a0_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a0_), HEDLEY_REINTERPRET_CAST(SET*, b0_), slop, filename, line, astr, bstr) && \ - simde_assert_equal_v##symbol_identifier##_(sizeof(a1_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a1_), HEDLEY_REINTERPRET_CAST(SET*, b1_), slop, filename, line, astr, bstr) && \ - simde_assert_equal_v##symbol_identifier##_(sizeof(a2_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a2_), HEDLEY_REINTERPRET_CAST(SET*, b2_), slop, filename, line, astr, bstr); \ - } + SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(NT, ET, SET, element_count, modifier, symbol_identifier) \ #if !defined(SIMDE_BUG_INTEL_857088) HEDLEY_DIAGNOSTIC_PUSH @@ -371,6 +384,13 @@ SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_( uint32x4x3_t, uint32_ SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_( uint64x2x3_t, uint64_t, 2, q, u64, u64) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_FUNCS_(float32x4x3_t, simde_float32_t, simde_float32, 4, q, f32) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_FUNCS_(float64x2x3_t, simde_float64_t, simde_float64, 2, q, f64) +#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 +SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_FUNCS_(float16x4x3_t, simde_float16_t, simde_float16, 4, , f16) +SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_FUNCS_(float16x8x3_t, simde_float16_t, simde_float16, 8, q, f16) +#else +SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(float16x4x3_t, simde_float16_t, simde_float16, 4, , f16) +SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(float16x8x3_t, simde_float16_t, simde_float16, 8, q, f16) +#endif HEDLEY_DIAGNOSTIC_POP #endif @@ -433,6 +453,30 @@ HEDLEY_DIAGNOSTIC_POP && simde_assert_equal_v##symbol_identifier##_(sizeof(a3_) / sizeof(a3_[0]), a3_, b3_, filename, line, astr, bstr); \ } +#define SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(NT, ET, SET, element_count, modifier, symbol_identifier) \ + static int \ + simde_test_arm_neon_assert_equal_##symbol_identifier##x##element_count##x4_(simde_##NT a, simde_##NT b, ET slop, \ + const char* filename, int line, const char* astr, const char* bstr) { \ + SET a0_[sizeof(a.val[0]) / sizeof(ET)], b0_[sizeof(b.val[0]) / sizeof(ET)]; \ + SET a1_[sizeof(a.val[1]) / sizeof(ET)], b1_[sizeof(b.val[1]) / sizeof(ET)]; \ + SET a2_[sizeof(a.val[2]) / sizeof(ET)], b2_[sizeof(b.val[2]) / sizeof(ET)]; \ + SET a3_[sizeof(a.val[3]) / sizeof(ET)], b3_[sizeof(b.val[3]) / sizeof(ET)]; \ + \ + simde_vst1##modifier##_##symbol_identifier(a0_, a.val[0]); \ + simde_vst1##modifier##_##symbol_identifier(b0_, b.val[0]); \ + simde_vst1##modifier##_##symbol_identifier(a1_, a.val[1]); \ + simde_vst1##modifier##_##symbol_identifier(b1_, b.val[1]); \ + simde_vst1##modifier##_##symbol_identifier(a2_, a.val[2]); \ + simde_vst1##modifier##_##symbol_identifier(b2_, b.val[2]); \ + simde_vst1##modifier##_##symbol_identifier(a3_, a.val[3]); \ + simde_vst1##modifier##_##symbol_identifier(b3_, b.val[3]); \ + \ + return simde_assert_equal_v##symbol_identifier##_(sizeof(a0_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a0_), HEDLEY_REINTERPRET_CAST(SET*, b0_), slop, filename, line, astr, bstr) && \ + simde_assert_equal_v##symbol_identifier##_(sizeof(a1_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a1_), HEDLEY_REINTERPRET_CAST(SET*, b1_), slop, filename, line, astr, bstr) && \ + simde_assert_equal_v##symbol_identifier##_(sizeof(a2_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a2_), HEDLEY_REINTERPRET_CAST(SET*, b2_), slop, filename, line, astr, bstr) && \ + simde_assert_equal_v##symbol_identifier##_(sizeof(a3_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a3_), HEDLEY_REINTERPRET_CAST(SET*, b3_), slop, filename, line, astr, bstr); \ + } \ + #define SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_FUNCS_(NT, ET, SET, element_count, modifier, symbol_identifier) \ static simde_##NT \ simde_test_arm_neon_random_##symbol_identifier##x##element_count##x4(ET min, ET max) { \ @@ -477,28 +521,7 @@ HEDLEY_DIAGNOSTIC_POP } \ } \ \ - static int \ - simde_test_arm_neon_assert_equal_##symbol_identifier##x##element_count##x4_(simde_##NT a, simde_##NT b, ET slop, \ - const char* filename, int line, const char* astr, const char* bstr) { \ - SET a0_[sizeof(a.val[0]) / sizeof(ET)], b0_[sizeof(b.val[0]) / sizeof(ET)]; \ - SET a1_[sizeof(a.val[1]) / sizeof(ET)], b1_[sizeof(b.val[1]) / sizeof(ET)]; \ - SET a2_[sizeof(a.val[2]) / sizeof(ET)], b2_[sizeof(b.val[2]) / sizeof(ET)]; \ - SET a3_[sizeof(a.val[3]) / sizeof(ET)], b3_[sizeof(b.val[3]) / sizeof(ET)]; \ - \ - simde_vst1##modifier##_##symbol_identifier(a0_, a.val[0]); \ - simde_vst1##modifier##_##symbol_identifier(b0_, b.val[0]); \ - simde_vst1##modifier##_##symbol_identifier(a1_, a.val[1]); \ - simde_vst1##modifier##_##symbol_identifier(b1_, b.val[1]); \ - simde_vst1##modifier##_##symbol_identifier(a2_, a.val[2]); \ - simde_vst1##modifier##_##symbol_identifier(b2_, b.val[2]); \ - simde_vst1##modifier##_##symbol_identifier(a3_, a.val[3]); \ - simde_vst1##modifier##_##symbol_identifier(b3_, b.val[3]); \ - \ - return simde_assert_equal_v##symbol_identifier##_(sizeof(a0_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a0_), HEDLEY_REINTERPRET_CAST(SET*, b0_), slop, filename, line, astr, bstr) && \ - simde_assert_equal_v##symbol_identifier##_(sizeof(a1_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a1_), HEDLEY_REINTERPRET_CAST(SET*, b1_), slop, filename, line, astr, bstr) && \ - simde_assert_equal_v##symbol_identifier##_(sizeof(a2_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a2_), HEDLEY_REINTERPRET_CAST(SET*, b2_), slop, filename, line, astr, bstr) && \ - simde_assert_equal_v##symbol_identifier##_(sizeof(a3_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a3_), HEDLEY_REINTERPRET_CAST(SET*, b3_), slop, filename, line, astr, bstr); \ - } + SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(NT, ET, SET, element_count, modifier, symbol_identifier) \ #if !defined(SIMDE_BUG_INTEL_857088) HEDLEY_DIAGNOSTIC_PUSH @@ -525,6 +548,13 @@ SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_( uint32x4x4_t, uint32_ SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_( uint64x2x4_t, uint64_t, 2, q, u64, u64) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_FUNCS_(float32x4x4_t, simde_float32_t, simde_float32, 4, q, f32) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_FUNCS_(float64x2x4_t, simde_float64_t, simde_float64, 2, q, f64) +#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 +SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_FUNCS_(float16x4x4_t, simde_float16_t, simde_float16, 4, , f16) +SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_FUNCS_(float16x8x4_t, simde_float16_t, simde_float16, 8, q, f16) +#else +SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(float16x4x4_t, simde_float16_t, simde_float16, 4, , f16) +SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(float16x8x4_t, simde_float16_t, simde_float16, 8, q, f16) +#endif HEDLEY_DIAGNOSTIC_POP #endif