From 78593bee5275eb2dbe04ce51f011f08cc701d394 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Mon, 16 Oct 2023 17:37:30 +0800 Subject: [PATCH 01/29] [Neon] Add vcadd_rot270_f{16/32} and vcaddq_rot270_f{16/32/64} --- simde/arm/neon/cadd_rot270.h | 198 +++++++++++++++++++++ test/arm/neon/cadd_rot270.c | 329 +++++++++++++++++++++++++++++++++++ 2 files changed, 527 insertions(+) create mode 100644 simde/arm/neon/cadd_rot270.h create mode 100644 test/arm/neon/cadd_rot270.c diff --git a/simde/arm/neon/cadd_rot270.h b/simde/arm/neon/cadd_rot270.h new file mode 100644 index 000000000..0ccbec6c0 --- /dev/null +++ b/simde/arm/neon/cadd_rot270.h @@ -0,0 +1,198 @@ +/* SPDX-License-Identifier: MIT +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, copy, +* modify, merge, publish, distribute, sublicense, and/or sell copies +* of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +* +* Copyright: +* 2023 Chi-Wei Chu +*/ + +#if !defined(SIMDE_ARM_NEON_CADD_ROT270_H) +#define SIMDE_ARM_NEON_CADD_ROT270_H + +#include "types.h" +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vcadd_rot270_f16(simde_float16x4_t a, simde_float16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcadd_rot270_f16(a, b); + #else + simde_float16x4_private + r_, + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; + r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; + } + #endif + + return simde_float16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcadd_rot270_f16 + #define vcadd_rot270_f16(a, b) simde_vcadd_rot270_f16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vcaddq_rot270_f16(simde_float16x8_t a, simde_float16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcaddq_rot270_f16(a, b); + #else + simde_float16x8_private + r_ , + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, 13, 4, 15, 6); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; + r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; + } + #endif + + return simde_float16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcaddq_rot270_f16 + #define vcaddq_rot270_f16(a, b) simde_vcaddq_rot270_f16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcadd_rot270_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcadd_rot270_f32(a, b); + #else + simde_float32x2_private + r_ , + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; + r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcadd_rot270_f32 + #define vcadd_rot270_f32(a, b) simde_vcadd_rot270_f32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcaddq_rot270_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcaddq_rot270_f32(a, b); + #else + simde_float32x4_private + r_ , + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; + r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcaddq_rot270_f32 + #define vcaddq_rot270_f32(a, b) simde_vcaddq_rot270_f32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vcaddq_rot270_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcaddq_rot270_f64(a, b); + #else + simde_float64x2_private + r_ , + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, b_.values, 3, 0); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; + r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcaddq_rot270_f64 + #define vcaddq_rot270_f64(a, b) simde_vcaddq_rot270_f64(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CADD_ROT270_H) */ \ No newline at end of file diff --git a/test/arm/neon/cadd_rot270.c b/test/arm/neon/cadd_rot270.c new file mode 100644 index 000000000..70bbaafd0 --- /dev/null +++ b/test/arm/neon/cadd_rot270.c @@ -0,0 +1,329 @@ +#define SIMDE_TEST_ARM_NEON_INSN cadd_rot270 + +#include "test-neon.h" +#include "../../../simde/arm/neon/cadd_rot270.h" + +static int +test_simde_vcadd_rot270_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t a[4]; + simde_float16_t b[4]; + simde_float16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 816.50), SIMDE_FLOAT16_VALUE( 773.50), SIMDE_FLOAT16_VALUE( -377.75), SIMDE_FLOAT16_VALUE( 544.50) }, + { SIMDE_FLOAT16_VALUE( -188.38), SIMDE_FLOAT16_VALUE( 269.75), SIMDE_FLOAT16_VALUE( 115.62), SIMDE_FLOAT16_VALUE( -544.00) }, + { SIMDE_FLOAT16_VALUE( 1086.00), SIMDE_FLOAT16_VALUE( 962.00), SIMDE_FLOAT16_VALUE( -922.00), SIMDE_FLOAT16_VALUE( 429.00) } }, + { { SIMDE_FLOAT16_VALUE( -659.50), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( 484.00), SIMDE_FLOAT16_VALUE( -757.00) }, + { SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, + { SIMDE_FLOAT16_VALUE( -556.50), SIMDE_FLOAT16_VALUE( 194.50), SIMDE_FLOAT16_VALUE( 1382.00), SIMDE_FLOAT16_VALUE( -375.75) } }, + { { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, + { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, + { SIMDE_FLOAT16_VALUE( 454.50), SIMDE_FLOAT16_VALUE( -107.75), SIMDE_FLOAT16_VALUE( -67.25), SIMDE_FLOAT16_VALUE( 607.00) } }, + { { SIMDE_FLOAT16_VALUE( -899.00), SIMDE_FLOAT16_VALUE( -582.50), SIMDE_FLOAT16_VALUE( 359.00), SIMDE_FLOAT16_VALUE( 342.25) }, + { SIMDE_FLOAT16_VALUE( 913.00), SIMDE_FLOAT16_VALUE( 740.50), SIMDE_FLOAT16_VALUE( -436.25), SIMDE_FLOAT16_VALUE( -904.00) }, + { SIMDE_FLOAT16_VALUE( -158.50), SIMDE_FLOAT16_VALUE( -1496.00), SIMDE_FLOAT16_VALUE( -545.00), SIMDE_FLOAT16_VALUE( 778.50) } }, + { { SIMDE_FLOAT16_VALUE( -335.25), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( 227.25), SIMDE_FLOAT16_VALUE( -8.67) }, + { SIMDE_FLOAT16_VALUE( 490.50), SIMDE_FLOAT16_VALUE( 507.50), SIMDE_FLOAT16_VALUE( -979.50), SIMDE_FLOAT16_VALUE( -718.00) }, + { SIMDE_FLOAT16_VALUE( 172.25), SIMDE_FLOAT16_VALUE( -743.00), SIMDE_FLOAT16_VALUE( -490.75), SIMDE_FLOAT16_VALUE( 971.00) } }, + { { SIMDE_FLOAT16_VALUE( 498.50), SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -797.50), SIMDE_FLOAT16_VALUE( 741.50) }, + { SIMDE_FLOAT16_VALUE( -835.00), SIMDE_FLOAT16_VALUE( -308.00), SIMDE_FLOAT16_VALUE( -674.50), SIMDE_FLOAT16_VALUE( -247.00) }, + { SIMDE_FLOAT16_VALUE( 190.50), SIMDE_FLOAT16_VALUE( 1041.00), SIMDE_FLOAT16_VALUE( -1044.00), SIMDE_FLOAT16_VALUE( 1416.00) } }, + { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, + { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, + { SIMDE_FLOAT16_VALUE( -1177.00), SIMDE_FLOAT16_VALUE( -573.00), SIMDE_FLOAT16_VALUE( 1188.00), SIMDE_FLOAT16_VALUE( -813.00) } }, + { { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50) }, + { SIMDE_FLOAT16_VALUE( -975.50), SIMDE_FLOAT16_VALUE( 383.50), SIMDE_FLOAT16_VALUE( 500.25), SIMDE_FLOAT16_VALUE( 517.00) }, + { SIMDE_FLOAT16_VALUE( 22.50), SIMDE_FLOAT16_VALUE( 1161.00), SIMDE_FLOAT16_VALUE( 577.50), SIMDE_FLOAT16_VALUE( 249.25) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x4_t r = simde_vcadd_rot270_f16(a, b); + + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + } + + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t r = simde_vcadd_rot270_f16(a, b); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcaddq_rot270_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t a[8]; + simde_float16_t b[8]; + simde_float16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 540.00), SIMDE_FLOAT16_VALUE( -927.50), SIMDE_FLOAT16_VALUE( 17.94), SIMDE_FLOAT16_VALUE( 340.75), + SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( 854.00), SIMDE_FLOAT16_VALUE( -110.75) }, + { SIMDE_FLOAT16_VALUE( -936.50), SIMDE_FLOAT16_VALUE( -465.00), SIMDE_FLOAT16_VALUE( -104.00), SIMDE_FLOAT16_VALUE( -348.00), + SIMDE_FLOAT16_VALUE( -370.75), SIMDE_FLOAT16_VALUE( 18.20), SIMDE_FLOAT16_VALUE( -209.25), SIMDE_FLOAT16_VALUE( -856.00) }, + { SIMDE_FLOAT16_VALUE(75.000000), SIMDE_FLOAT16_VALUE( 9.000000), SIMDE_FLOAT16_VALUE(-330.000000), SIMDE_FLOAT16_VALUE(444.750000), + SIMDE_FLOAT16_VALUE(-379.500000), SIMDE_FLOAT16_VALUE(537.000000), SIMDE_FLOAT16_VALUE(-2.000000), SIMDE_FLOAT16_VALUE(98.500000) } }, + { { SIMDE_FLOAT16_VALUE( 251.25), SIMDE_FLOAT16_VALUE( 523.50), SIMDE_FLOAT16_VALUE( -62.34), SIMDE_FLOAT16_VALUE( -412.00), + SIMDE_FLOAT16_VALUE( 818.50), SIMDE_FLOAT16_VALUE( -266.25), SIMDE_FLOAT16_VALUE( 558.50), SIMDE_FLOAT16_VALUE( -474.75) }, + { SIMDE_FLOAT16_VALUE( 583.50), SIMDE_FLOAT16_VALUE( -666.00), SIMDE_FLOAT16_VALUE( 875.50), SIMDE_FLOAT16_VALUE( 382.50), + SIMDE_FLOAT16_VALUE( -535.00), SIMDE_FLOAT16_VALUE( 374.50), SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 432.50) }, + { SIMDE_FLOAT16_VALUE(-414.750000), SIMDE_FLOAT16_VALUE(-60.000000), SIMDE_FLOAT16_VALUE(320.250000), SIMDE_FLOAT16_VALUE(-1288.000000), + SIMDE_FLOAT16_VALUE(1193.000000), SIMDE_FLOAT16_VALUE(268.750000), SIMDE_FLOAT16_VALUE(991.000000), SIMDE_FLOAT16_VALUE(-564.500000) } }, + { { SIMDE_FLOAT16_VALUE( 83.19), SIMDE_FLOAT16_VALUE( 418.00), SIMDE_FLOAT16_VALUE( -875.50), SIMDE_FLOAT16_VALUE( 830.50), + SIMDE_FLOAT16_VALUE( -633.50), SIMDE_FLOAT16_VALUE( 832.00), SIMDE_FLOAT16_VALUE( -890.00), SIMDE_FLOAT16_VALUE( 650.00) }, + { SIMDE_FLOAT16_VALUE( -111.25), SIMDE_FLOAT16_VALUE( -830.50), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( 970.50), + SIMDE_FLOAT16_VALUE( 522.50), SIMDE_FLOAT16_VALUE( -822.50), SIMDE_FLOAT16_VALUE( 411.25), SIMDE_FLOAT16_VALUE( -692.50) }, + { SIMDE_FLOAT16_VALUE(-747.500000), SIMDE_FLOAT16_VALUE(529.000000), SIMDE_FLOAT16_VALUE(95.000000), SIMDE_FLOAT16_VALUE(771.000000), + SIMDE_FLOAT16_VALUE(-1456.000000), SIMDE_FLOAT16_VALUE(309.500000), SIMDE_FLOAT16_VALUE(-1582.000000), SIMDE_FLOAT16_VALUE(238.750000) } }, + { { SIMDE_FLOAT16_VALUE( 648.50), SIMDE_FLOAT16_VALUE( 385.25), SIMDE_FLOAT16_VALUE( 479.25), SIMDE_FLOAT16_VALUE( -793.50), + SIMDE_FLOAT16_VALUE( -740.50), SIMDE_FLOAT16_VALUE( 245.00), SIMDE_FLOAT16_VALUE( 229.25), SIMDE_FLOAT16_VALUE( -113.25) }, + { SIMDE_FLOAT16_VALUE( 331.50), SIMDE_FLOAT16_VALUE( -677.50), SIMDE_FLOAT16_VALUE( 97.31), SIMDE_FLOAT16_VALUE( -52.09), + SIMDE_FLOAT16_VALUE( 239.12), SIMDE_FLOAT16_VALUE( 469.75), SIMDE_FLOAT16_VALUE( -543.50), SIMDE_FLOAT16_VALUE( 827.00) }, + { SIMDE_FLOAT16_VALUE(-29.000000), SIMDE_FLOAT16_VALUE(53.750000), SIMDE_FLOAT16_VALUE(427.250000), SIMDE_FLOAT16_VALUE(-891.000000), + SIMDE_FLOAT16_VALUE(-270.750000), SIMDE_FLOAT16_VALUE( 5.875000), SIMDE_FLOAT16_VALUE(1056.000000), SIMDE_FLOAT16_VALUE(430.250000) } }, + { { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( 420.50), + SIMDE_FLOAT16_VALUE( -699.00), SIMDE_FLOAT16_VALUE( -151.12), SIMDE_FLOAT16_VALUE( -388.25), SIMDE_FLOAT16_VALUE( 350.75) }, + { SIMDE_FLOAT16_VALUE( 890.50), SIMDE_FLOAT16_VALUE( -665.00), SIMDE_FLOAT16_VALUE( -618.00), SIMDE_FLOAT16_VALUE( 190.88), + SIMDE_FLOAT16_VALUE( 218.12), SIMDE_FLOAT16_VALUE( -329.00), SIMDE_FLOAT16_VALUE( -549.50), SIMDE_FLOAT16_VALUE( -460.00) }, + { SIMDE_FLOAT16_VALUE(-438.500000), SIMDE_FLOAT16_VALUE(-711.500000), SIMDE_FLOAT16_VALUE(372.500000), SIMDE_FLOAT16_VALUE(1038.000000), + SIMDE_FLOAT16_VALUE(-1028.000000), SIMDE_FLOAT16_VALUE(-369.250000), SIMDE_FLOAT16_VALUE(-848.000000), SIMDE_FLOAT16_VALUE(900.000000) } }, + { { SIMDE_FLOAT16_VALUE( -378.00), SIMDE_FLOAT16_VALUE( -695.50), SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44), + SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00) }, + { SIMDE_FLOAT16_VALUE( -786.50), SIMDE_FLOAT16_VALUE( -974.00), SIMDE_FLOAT16_VALUE( -570.50), SIMDE_FLOAT16_VALUE( 369.25), + SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( -640.00), SIMDE_FLOAT16_VALUE( -552.00), SIMDE_FLOAT16_VALUE( 75.88) }, + { SIMDE_FLOAT16_VALUE(-1352.000000), SIMDE_FLOAT16_VALUE(91.000000), SIMDE_FLOAT16_VALUE(490.750000), SIMDE_FLOAT16_VALUE(492.000000), + SIMDE_FLOAT16_VALUE(-1510.000000), SIMDE_FLOAT16_VALUE(-758.000000), SIMDE_FLOAT16_VALUE(533.000000), SIMDE_FLOAT16_VALUE(1131.000000) } }, + { { SIMDE_FLOAT16_VALUE( 352.25), SIMDE_FLOAT16_VALUE( 324.25), SIMDE_FLOAT16_VALUE( 485.50), SIMDE_FLOAT16_VALUE( 943.50), + SIMDE_FLOAT16_VALUE( -976.00), SIMDE_FLOAT16_VALUE( 395.50), SIMDE_FLOAT16_VALUE( -100.62), SIMDE_FLOAT16_VALUE( -463.75) }, + { SIMDE_FLOAT16_VALUE( -973.00), SIMDE_FLOAT16_VALUE( -159.62), SIMDE_FLOAT16_VALUE( -377.00), SIMDE_FLOAT16_VALUE( -853.00), + SIMDE_FLOAT16_VALUE( 831.00), SIMDE_FLOAT16_VALUE( 661.00), SIMDE_FLOAT16_VALUE( -717.00), SIMDE_FLOAT16_VALUE( 891.50) }, + { SIMDE_FLOAT16_VALUE(192.625000), SIMDE_FLOAT16_VALUE(1297.000000), SIMDE_FLOAT16_VALUE(-367.500000), SIMDE_FLOAT16_VALUE(1320.000000), + SIMDE_FLOAT16_VALUE(-315.000000), SIMDE_FLOAT16_VALUE(-435.500000), SIMDE_FLOAT16_VALUE(791.000000), SIMDE_FLOAT16_VALUE(253.250000) } }, + { { SIMDE_FLOAT16_VALUE( 535.50), SIMDE_FLOAT16_VALUE( 621.50), SIMDE_FLOAT16_VALUE( 578.50), SIMDE_FLOAT16_VALUE( 430.50), + SIMDE_FLOAT16_VALUE( -574.50), SIMDE_FLOAT16_VALUE( -319.75), SIMDE_FLOAT16_VALUE( -735.00), SIMDE_FLOAT16_VALUE( 248.50) }, + { SIMDE_FLOAT16_VALUE( 274.50), SIMDE_FLOAT16_VALUE( 192.38), SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 502.25), + SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( -70.81), SIMDE_FLOAT16_VALUE( -596.50), SIMDE_FLOAT16_VALUE( 810.50) }, + { SIMDE_FLOAT16_VALUE(728.000000), SIMDE_FLOAT16_VALUE(347.000000), SIMDE_FLOAT16_VALUE(1081.000000), SIMDE_FLOAT16_VALUE(-248.500000), + SIMDE_FLOAT16_VALUE(-645.500000), SIMDE_FLOAT16_VALUE(298.750000), SIMDE_FLOAT16_VALUE(75.500000), SIMDE_FLOAT16_VALUE(845.000000) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16x8_t r = simde_vcaddq_rot270_f16(a, b); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + } + + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t r = simde_vcaddq_rot270_f16(a, b); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcadd_rot270_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 a[2]; + simde_float32 b[2]; + simde_float32 r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 863.66), SIMDE_FLOAT32_C( 828.31) }, + { SIMDE_FLOAT32_C( -563.51), SIMDE_FLOAT32_C( -576.51) }, + { SIMDE_FLOAT32_C(287.149963), SIMDE_FLOAT32_C(1391.820068) } }, + { { SIMDE_FLOAT32_C( -703.45), SIMDE_FLOAT32_C( 383.90) }, + { SIMDE_FLOAT32_C( -772.46), SIMDE_FLOAT32_C( 457.40) }, + { SIMDE_FLOAT32_C(-246.050018), SIMDE_FLOAT32_C(1156.359985) } }, + { { SIMDE_FLOAT32_C( 295.99), SIMDE_FLOAT32_C( 653.10) }, + { SIMDE_FLOAT32_C( -120.98), SIMDE_FLOAT32_C( 945.50) }, + { SIMDE_FLOAT32_C(1241.489990), SIMDE_FLOAT32_C(774.079956) } }, + { { SIMDE_FLOAT32_C( -280.81), SIMDE_FLOAT32_C( 631.32) }, + { SIMDE_FLOAT32_C( 688.34), SIMDE_FLOAT32_C( 191.95) }, + { SIMDE_FLOAT32_C(-88.860001), SIMDE_FLOAT32_C(-57.020020) } }, + { { SIMDE_FLOAT32_C( -522.88), SIMDE_FLOAT32_C( -323.79) }, + { SIMDE_FLOAT32_C( -887.99), SIMDE_FLOAT32_C( -283.70) }, + { SIMDE_FLOAT32_C(-806.580017), SIMDE_FLOAT32_C(564.199951) } }, + { { SIMDE_FLOAT32_C( -117.76), SIMDE_FLOAT32_C( -841.45) }, + { SIMDE_FLOAT32_C( 664.94), SIMDE_FLOAT32_C( -987.19) }, + { SIMDE_FLOAT32_C(-1104.949951), SIMDE_FLOAT32_C(-1506.390015) } }, + { { SIMDE_FLOAT32_C( -642.89), SIMDE_FLOAT32_C( -152.10) }, + { SIMDE_FLOAT32_C( 963.83), SIMDE_FLOAT32_C( 919.89) }, + { SIMDE_FLOAT32_C(277.000000), SIMDE_FLOAT32_C(-1115.930054) } }, + { { SIMDE_FLOAT32_C( 630.40), SIMDE_FLOAT32_C( -669.33) }, + { SIMDE_FLOAT32_C( 671.13), SIMDE_FLOAT32_C( 256.93) }, + { SIMDE_FLOAT32_C(887.330017), SIMDE_FLOAT32_C(-1340.459961) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); + simde_float32x2_t r = simde_vcadd_rot270_f32(a, b); + + simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); + } + + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t r = simde_vcadd_rot270_f32(a, b); + + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcaddq_rot270_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 a[4]; + simde_float32 b[4]; + simde_float32 r[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( -337.31), SIMDE_FLOAT32_C( -857.36), SIMDE_FLOAT32_C( 334.71), SIMDE_FLOAT32_C( -617.33) }, + { SIMDE_FLOAT32_C( -439.38), SIMDE_FLOAT32_C( 245.13), SIMDE_FLOAT32_C( 111.06), SIMDE_FLOAT32_C( 520.69) }, + { SIMDE_FLOAT32_C(-92.179993), SIMDE_FLOAT32_C(-417.979980), SIMDE_FLOAT32_C(855.400024), SIMDE_FLOAT32_C(-728.390015) } }, + { { SIMDE_FLOAT32_C( 85.49), SIMDE_FLOAT32_C( 250.19), SIMDE_FLOAT32_C( -679.96), SIMDE_FLOAT32_C( -750.25) }, + { SIMDE_FLOAT32_C( -138.26), SIMDE_FLOAT32_C( -14.62), SIMDE_FLOAT32_C( -921.52), SIMDE_FLOAT32_C( 225.91) }, + { SIMDE_FLOAT32_C(70.869995), SIMDE_FLOAT32_C(388.450012), SIMDE_FLOAT32_C(-454.050018), SIMDE_FLOAT32_C(171.270020) } }, + { { SIMDE_FLOAT32_C( 242.83), SIMDE_FLOAT32_C( 869.28), SIMDE_FLOAT32_C( 297.95), SIMDE_FLOAT32_C( 105.66) }, + { SIMDE_FLOAT32_C( -722.51), SIMDE_FLOAT32_C( -802.37), SIMDE_FLOAT32_C( -245.78), SIMDE_FLOAT32_C( 915.39) }, + { SIMDE_FLOAT32_C(-559.539978), SIMDE_FLOAT32_C(1591.790039), SIMDE_FLOAT32_C(1213.340088), SIMDE_FLOAT32_C(351.440002) } }, + { { SIMDE_FLOAT32_C( 54.20), SIMDE_FLOAT32_C( -928.06), SIMDE_FLOAT32_C( 362.39), SIMDE_FLOAT32_C( -936.63) }, + { SIMDE_FLOAT32_C( 185.82), SIMDE_FLOAT32_C( -244.43), SIMDE_FLOAT32_C( 924.66), SIMDE_FLOAT32_C( -643.82) }, + { SIMDE_FLOAT32_C(-190.229996), SIMDE_FLOAT32_C(-1113.880005), SIMDE_FLOAT32_C(-281.429993), SIMDE_FLOAT32_C(-1861.290039) } }, + { { SIMDE_FLOAT32_C( -516.92), SIMDE_FLOAT32_C( -615.16), SIMDE_FLOAT32_C( -751.52), SIMDE_FLOAT32_C( -974.04) }, + { SIMDE_FLOAT32_C( -144.42), SIMDE_FLOAT32_C( 338.27), SIMDE_FLOAT32_C( 704.92), SIMDE_FLOAT32_C( 116.90) }, + { SIMDE_FLOAT32_C(-178.649994), SIMDE_FLOAT32_C(-470.739990), SIMDE_FLOAT32_C(-634.619995), SIMDE_FLOAT32_C(-1678.959961) } }, + { { SIMDE_FLOAT32_C( 49.39), SIMDE_FLOAT32_C( -363.00), SIMDE_FLOAT32_C( -476.30), SIMDE_FLOAT32_C( 106.71) }, + { SIMDE_FLOAT32_C( -725.84), SIMDE_FLOAT32_C( -353.71), SIMDE_FLOAT32_C( 268.41), SIMDE_FLOAT32_C( 728.83) }, + { SIMDE_FLOAT32_C(-304.320007), SIMDE_FLOAT32_C(362.840027), SIMDE_FLOAT32_C(252.530029), SIMDE_FLOAT32_C(-161.700012) } }, + { { SIMDE_FLOAT32_C( -470.26), SIMDE_FLOAT32_C( 81.87), SIMDE_FLOAT32_C( 72.27), SIMDE_FLOAT32_C( -992.59) }, + { SIMDE_FLOAT32_C( -615.67), SIMDE_FLOAT32_C( 620.28), SIMDE_FLOAT32_C( -606.51), SIMDE_FLOAT32_C( -327.77) }, + { SIMDE_FLOAT32_C(150.020020), SIMDE_FLOAT32_C(697.539978), SIMDE_FLOAT32_C(-255.500000), SIMDE_FLOAT32_C(-386.080017) } }, + { { SIMDE_FLOAT32_C( -331.80), SIMDE_FLOAT32_C( -605.94), SIMDE_FLOAT32_C( -295.86), SIMDE_FLOAT32_C( -275.42) }, + { SIMDE_FLOAT32_C( -752.64), SIMDE_FLOAT32_C( 21.91), SIMDE_FLOAT32_C( 826.83), SIMDE_FLOAT32_C( 599.82) }, + { SIMDE_FLOAT32_C(-309.889984), SIMDE_FLOAT32_C(146.700012), SIMDE_FLOAT32_C(303.960022), SIMDE_FLOAT32_C(-1102.250000) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); + simde_float32x4_t r = simde_vcaddq_rot270_f32(a, b); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t r = simde_vcaddq_rot270_f32(a, b); + + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcaddq_rot270_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float64 a[2]; + simde_float64 b[2]; + simde_float64 r[2]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( -30.36), SIMDE_FLOAT64_C( 631.53) }, + { SIMDE_FLOAT64_C( 850.75), SIMDE_FLOAT64_C( -263.55) }, + { SIMDE_FLOAT64_C(-293.910000), SIMDE_FLOAT64_C(-219.220000) } }, + { { SIMDE_FLOAT64_C( 139.96), SIMDE_FLOAT64_C( 859.14) }, + { SIMDE_FLOAT64_C( -834.47), SIMDE_FLOAT64_C( 216.10) }, + { SIMDE_FLOAT64_C(356.060000), SIMDE_FLOAT64_C(1693.610000) } }, + { { SIMDE_FLOAT64_C( 995.86), SIMDE_FLOAT64_C( 529.74) }, + { SIMDE_FLOAT64_C( 79.08), SIMDE_FLOAT64_C( 947.13) }, + { SIMDE_FLOAT64_C(1942.990000), SIMDE_FLOAT64_C(450.660000) } }, + { { SIMDE_FLOAT64_C( 122.02), SIMDE_FLOAT64_C( -250.00) }, + { SIMDE_FLOAT64_C( -361.82), SIMDE_FLOAT64_C( 265.24) }, + { SIMDE_FLOAT64_C(387.260000), SIMDE_FLOAT64_C(111.820000) } }, + { { SIMDE_FLOAT64_C( 275.71), SIMDE_FLOAT64_C( 2.71) }, + { SIMDE_FLOAT64_C( 99.79), SIMDE_FLOAT64_C( -137.67) }, + { SIMDE_FLOAT64_C(138.040000), SIMDE_FLOAT64_C(-97.080000) } }, + { { SIMDE_FLOAT64_C( -761.19), SIMDE_FLOAT64_C( 813.19) }, + { SIMDE_FLOAT64_C( -897.68), SIMDE_FLOAT64_C( 653.58) }, + { SIMDE_FLOAT64_C(-107.610000), SIMDE_FLOAT64_C(1710.870000) } }, + { { SIMDE_FLOAT64_C( 396.02), SIMDE_FLOAT64_C( 413.06) }, + { SIMDE_FLOAT64_C( 514.09), SIMDE_FLOAT64_C( -977.67) }, + { SIMDE_FLOAT64_C(-581.650000), SIMDE_FLOAT64_C(-101.030000) } }, + { { SIMDE_FLOAT64_C( -671.79), SIMDE_FLOAT64_C( -92.13) }, + { SIMDE_FLOAT64_C( -441.32), SIMDE_FLOAT64_C( -374.27) }, + { SIMDE_FLOAT64_C(-1046.060000), SIMDE_FLOAT64_C(349.190000) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); + simde_float64x2_t r = simde_vcaddq_rot270_f64(a, b); + simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); + } + + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0f, 1000.0f); + simde_float64x2_t b = simde_test_arm_neon_random_f64x2(-1000.0f, 1000.0f); + simde_float64x2_t r = simde_vcaddq_rot270_f64(a, b); + + simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vcadd_rot270_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcaddq_rot270_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcadd_rot270_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcaddq_rot270_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcaddq_rot270_f64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" \ No newline at end of file From 617cf05f08b08f25200c90f990707288462f91e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Mon, 16 Oct 2023 17:38:26 +0800 Subject: [PATCH 02/29] [Neon] Add vcadd_rot90_f{16/32} and vcaddq_rot90_f{16/32/64} --- simde/arm/neon/cadd_rot90.h | 198 ++++++++++++++++++++++ test/arm/neon/cadd_rot90.c | 329 ++++++++++++++++++++++++++++++++++++ 2 files changed, 527 insertions(+) create mode 100644 simde/arm/neon/cadd_rot90.h create mode 100644 test/arm/neon/cadd_rot90.c diff --git a/simde/arm/neon/cadd_rot90.h b/simde/arm/neon/cadd_rot90.h new file mode 100644 index 000000000..d43accd7b --- /dev/null +++ b/simde/arm/neon/cadd_rot90.h @@ -0,0 +1,198 @@ +/* SPDX-License-Identifier: MIT +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, copy, +* modify, merge, publish, distribute, sublicense, and/or sell copies +* of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +* +* Copyright: +* 2023 Chi-Wei Chu +*/ + +#if !defined(SIMDE_ARM_NEON_CADD_ROT90_H) +#define SIMDE_ARM_NEON_CADD_ROT90_H + +#include "types.h" +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vcadd_rot90_f16(simde_float16x4_t a, simde_float16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcadd_rot90_f16(a, b); + #else + simde_float16x4_private + r_, + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; + r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; + } + #endif + + return simde_float16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcadd_rot90_f16 + #define vcadd_rot90_f16(a, b) simde_vcadd_rot90_f16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vcaddq_rot90_f16(simde_float16x8_t a, simde_float16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcaddq_rot90_f16(a, b); + #else + simde_float16x8_private + r_ , + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, 5, 12, 7, 14); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; + r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; + } + #endif + + return simde_float16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcaddq_rot90_f16 + #define vcaddq_rot90_f16(a, b) simde_vcaddq_rot90_f16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcadd_rot90_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcadd_rot90_f32(a, b); + #else + simde_float32x2_private + r_ , + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; + r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcadd_rot90_f32 + #define vcadd_rot90_f32(a, b) simde_vcadd_rot90_f32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcaddq_rot90_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcaddq_rot90_f32(a, b); + #else + simde_float32x4_private + r_ , + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; + r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcaddq_rot90_f32 + #define vcaddq_rot90_f32(a, b) simde_vcaddq_rot90_f32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vcaddq_rot90_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcaddq_rot90_f64(a, b); + #else + simde_float64x2_private + r_ , + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, b_.values, 1, 2); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; + r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcaddq_rot90_f64 + #define vcaddq_rot90_f64(a, b) simde_vcaddq_rot90_f64(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CADD_ROT90_H) */ \ No newline at end of file diff --git a/test/arm/neon/cadd_rot90.c b/test/arm/neon/cadd_rot90.c new file mode 100644 index 000000000..a9f7d5f13 --- /dev/null +++ b/test/arm/neon/cadd_rot90.c @@ -0,0 +1,329 @@ +#define SIMDE_TEST_ARM_NEON_INSN cadd_rot90 + +#include "test-neon.h" +#include "../../../simde/arm/neon/cadd_rot90.h" + +static int +test_simde_vcadd_rot90_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t a[4]; + simde_float16_t b[4]; + simde_float16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 816.50), SIMDE_FLOAT16_VALUE( 773.50), SIMDE_FLOAT16_VALUE( -377.75), SIMDE_FLOAT16_VALUE( 544.50) }, + { SIMDE_FLOAT16_VALUE( -188.38), SIMDE_FLOAT16_VALUE( 269.75), SIMDE_FLOAT16_VALUE( 115.62), SIMDE_FLOAT16_VALUE( -544.00) }, + { SIMDE_FLOAT16_VALUE( 547.00), SIMDE_FLOAT16_VALUE( 585.00), SIMDE_FLOAT16_VALUE( 166.25), SIMDE_FLOAT16_VALUE( 660.00) } }, + { { SIMDE_FLOAT16_VALUE( -659.50), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( 484.00), SIMDE_FLOAT16_VALUE( -757.00) }, + { SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, + { SIMDE_FLOAT16_VALUE( -762.50), SIMDE_FLOAT16_VALUE( 1654.00), SIMDE_FLOAT16_VALUE( -414.50), SIMDE_FLOAT16_VALUE( -1138.00) } }, + { { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, + { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, + { SIMDE_FLOAT16_VALUE( 131.62), SIMDE_FLOAT16_VALUE( -120.38), SIMDE_FLOAT16_VALUE( -208.00), SIMDE_FLOAT16_VALUE( 838.00) } }, + { { SIMDE_FLOAT16_VALUE( -899.00), SIMDE_FLOAT16_VALUE( -582.50), SIMDE_FLOAT16_VALUE( 359.00), SIMDE_FLOAT16_VALUE( 342.25) }, + { SIMDE_FLOAT16_VALUE( 913.00), SIMDE_FLOAT16_VALUE( 740.50), SIMDE_FLOAT16_VALUE( -436.25), SIMDE_FLOAT16_VALUE( -904.00) }, + { SIMDE_FLOAT16_VALUE( -1640.00), SIMDE_FLOAT16_VALUE( 330.50), SIMDE_FLOAT16_VALUE( 1263.00), SIMDE_FLOAT16_VALUE( -94.00) } }, + { { SIMDE_FLOAT16_VALUE( -335.25), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( 227.25), SIMDE_FLOAT16_VALUE( -8.67) }, + { SIMDE_FLOAT16_VALUE( 490.50), SIMDE_FLOAT16_VALUE( 507.50), SIMDE_FLOAT16_VALUE( -979.50), SIMDE_FLOAT16_VALUE( -718.00) }, + { SIMDE_FLOAT16_VALUE( -843.00), SIMDE_FLOAT16_VALUE( 238.12), SIMDE_FLOAT16_VALUE( 945.00), SIMDE_FLOAT16_VALUE( -988.00) } }, + { { SIMDE_FLOAT16_VALUE( 498.50), SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -797.50), SIMDE_FLOAT16_VALUE( 741.50) }, + { SIMDE_FLOAT16_VALUE( -835.00), SIMDE_FLOAT16_VALUE( -308.00), SIMDE_FLOAT16_VALUE( -674.50), SIMDE_FLOAT16_VALUE( -247.00) }, + { SIMDE_FLOAT16_VALUE( 806.50), SIMDE_FLOAT16_VALUE( -629.00), SIMDE_FLOAT16_VALUE( -550.50), SIMDE_FLOAT16_VALUE( 67.00) } }, + { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, + { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, + { SIMDE_FLOAT16_VALUE( 755.00), SIMDE_FLOAT16_VALUE( -288.75), SIMDE_FLOAT16_VALUE( 263.00), SIMDE_FLOAT16_VALUE( -1076.00) } }, + { { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50) }, + { SIMDE_FLOAT16_VALUE( -975.50), SIMDE_FLOAT16_VALUE( 383.50), SIMDE_FLOAT16_VALUE( 500.25), SIMDE_FLOAT16_VALUE( 517.00) }, + { SIMDE_FLOAT16_VALUE( -744.50), SIMDE_FLOAT16_VALUE( -790.00), SIMDE_FLOAT16_VALUE( -456.50), SIMDE_FLOAT16_VALUE( 1250.00) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x4_t r = simde_vcadd_rot90_f16(a, b); + + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + } + + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t r = simde_vcadd_rot90_f16(a, b); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcaddq_rot90_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t a[8]; + simde_float16_t b[8]; + simde_float16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 540.00), SIMDE_FLOAT16_VALUE( -927.50), SIMDE_FLOAT16_VALUE( 17.94), SIMDE_FLOAT16_VALUE( 340.75), + SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( 854.00), SIMDE_FLOAT16_VALUE( -110.75) }, + { SIMDE_FLOAT16_VALUE( -936.50), SIMDE_FLOAT16_VALUE( -465.00), SIMDE_FLOAT16_VALUE( -104.00), SIMDE_FLOAT16_VALUE( -348.00), + SIMDE_FLOAT16_VALUE( -370.75), SIMDE_FLOAT16_VALUE( 18.20), SIMDE_FLOAT16_VALUE( -209.25), SIMDE_FLOAT16_VALUE( -856.00) }, + { SIMDE_FLOAT16_VALUE(1005.000000), SIMDE_FLOAT16_VALUE(-1864.000000), SIMDE_FLOAT16_VALUE(366.000000), SIMDE_FLOAT16_VALUE(236.750000), + SIMDE_FLOAT16_VALUE(-416.000000), SIMDE_FLOAT16_VALUE(-204.625000), SIMDE_FLOAT16_VALUE(1710.000000), SIMDE_FLOAT16_VALUE(-320.000000) } }, + { { SIMDE_FLOAT16_VALUE( 251.25), SIMDE_FLOAT16_VALUE( 523.50), SIMDE_FLOAT16_VALUE( -62.34), SIMDE_FLOAT16_VALUE( -412.00), + SIMDE_FLOAT16_VALUE( 818.50), SIMDE_FLOAT16_VALUE( -266.25), SIMDE_FLOAT16_VALUE( 558.50), SIMDE_FLOAT16_VALUE( -474.75) }, + { SIMDE_FLOAT16_VALUE( 583.50), SIMDE_FLOAT16_VALUE( -666.00), SIMDE_FLOAT16_VALUE( 875.50), SIMDE_FLOAT16_VALUE( 382.50), + SIMDE_FLOAT16_VALUE( -535.00), SIMDE_FLOAT16_VALUE( 374.50), SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 432.50) }, + { SIMDE_FLOAT16_VALUE(917.000000), SIMDE_FLOAT16_VALUE(1107.000000), SIMDE_FLOAT16_VALUE(-444.750000), SIMDE_FLOAT16_VALUE(463.500000), + SIMDE_FLOAT16_VALUE(444.000000), SIMDE_FLOAT16_VALUE(-801.000000), SIMDE_FLOAT16_VALUE(126.000000), SIMDE_FLOAT16_VALUE(-385.000000) } }, + { { SIMDE_FLOAT16_VALUE( 83.19), SIMDE_FLOAT16_VALUE( 418.00), SIMDE_FLOAT16_VALUE( -875.50), SIMDE_FLOAT16_VALUE( 830.50), + SIMDE_FLOAT16_VALUE( -633.50), SIMDE_FLOAT16_VALUE( 832.00), SIMDE_FLOAT16_VALUE( -890.00), SIMDE_FLOAT16_VALUE( 650.00) }, + { SIMDE_FLOAT16_VALUE( -111.25), SIMDE_FLOAT16_VALUE( -830.50), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( 970.50), + SIMDE_FLOAT16_VALUE( 522.50), SIMDE_FLOAT16_VALUE( -822.50), SIMDE_FLOAT16_VALUE( 411.25), SIMDE_FLOAT16_VALUE( -692.50) }, + { SIMDE_FLOAT16_VALUE(913.500000), SIMDE_FLOAT16_VALUE(306.750000), SIMDE_FLOAT16_VALUE(-1846.000000), SIMDE_FLOAT16_VALUE(890.000000), + SIMDE_FLOAT16_VALUE(189.000000), SIMDE_FLOAT16_VALUE(1354.000000), SIMDE_FLOAT16_VALUE(-197.500000), SIMDE_FLOAT16_VALUE(1061.000000) } }, + { { SIMDE_FLOAT16_VALUE( 648.50), SIMDE_FLOAT16_VALUE( 385.25), SIMDE_FLOAT16_VALUE( 479.25), SIMDE_FLOAT16_VALUE( -793.50), + SIMDE_FLOAT16_VALUE( -740.50), SIMDE_FLOAT16_VALUE( 245.00), SIMDE_FLOAT16_VALUE( 229.25), SIMDE_FLOAT16_VALUE( -113.25) }, + { SIMDE_FLOAT16_VALUE( 331.50), SIMDE_FLOAT16_VALUE( -677.50), SIMDE_FLOAT16_VALUE( 97.31), SIMDE_FLOAT16_VALUE( -52.09), + SIMDE_FLOAT16_VALUE( 239.12), SIMDE_FLOAT16_VALUE( 469.75), SIMDE_FLOAT16_VALUE( -543.50), SIMDE_FLOAT16_VALUE( 827.00) }, + { SIMDE_FLOAT16_VALUE(1326.000000), SIMDE_FLOAT16_VALUE(717.000000), SIMDE_FLOAT16_VALUE(531.500000), SIMDE_FLOAT16_VALUE(-696.000000), + SIMDE_FLOAT16_VALUE(-1210.000000), SIMDE_FLOAT16_VALUE(484.000000), SIMDE_FLOAT16_VALUE(-598.000000), SIMDE_FLOAT16_VALUE(-657.000000) } }, + { { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( 420.50), + SIMDE_FLOAT16_VALUE( -699.00), SIMDE_FLOAT16_VALUE( -151.12), SIMDE_FLOAT16_VALUE( -388.25), SIMDE_FLOAT16_VALUE( 350.75) }, + { SIMDE_FLOAT16_VALUE( 890.50), SIMDE_FLOAT16_VALUE( -665.00), SIMDE_FLOAT16_VALUE( -618.00), SIMDE_FLOAT16_VALUE( 190.88), + SIMDE_FLOAT16_VALUE( 218.12), SIMDE_FLOAT16_VALUE( -329.00), SIMDE_FLOAT16_VALUE( -549.50), SIMDE_FLOAT16_VALUE( -460.00) }, + { SIMDE_FLOAT16_VALUE(891.500000), SIMDE_FLOAT16_VALUE(1069.000000), SIMDE_FLOAT16_VALUE(-9.125000), SIMDE_FLOAT16_VALUE(-197.500000), + SIMDE_FLOAT16_VALUE(-370.000000), SIMDE_FLOAT16_VALUE(67.000000), SIMDE_FLOAT16_VALUE(71.750000), SIMDE_FLOAT16_VALUE(-198.750000) } }, + { { SIMDE_FLOAT16_VALUE( -378.00), SIMDE_FLOAT16_VALUE( -695.50), SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44), + SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00) }, + { SIMDE_FLOAT16_VALUE( -786.50), SIMDE_FLOAT16_VALUE( -974.00), SIMDE_FLOAT16_VALUE( -570.50), SIMDE_FLOAT16_VALUE( 369.25), + SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( -640.00), SIMDE_FLOAT16_VALUE( -552.00), SIMDE_FLOAT16_VALUE( 75.88) }, + { SIMDE_FLOAT16_VALUE(596.000000), SIMDE_FLOAT16_VALUE(-1482.000000), SIMDE_FLOAT16_VALUE(-247.750000), SIMDE_FLOAT16_VALUE(-649.000000), + SIMDE_FLOAT16_VALUE(-229.500000), SIMDE_FLOAT16_VALUE(-662.000000), SIMDE_FLOAT16_VALUE(381.500000), SIMDE_FLOAT16_VALUE(27.000000) } }, + { { SIMDE_FLOAT16_VALUE( 352.25), SIMDE_FLOAT16_VALUE( 324.25), SIMDE_FLOAT16_VALUE( 485.50), SIMDE_FLOAT16_VALUE( 943.50), + SIMDE_FLOAT16_VALUE( -976.00), SIMDE_FLOAT16_VALUE( 395.50), SIMDE_FLOAT16_VALUE( -100.62), SIMDE_FLOAT16_VALUE( -463.75) }, + { SIMDE_FLOAT16_VALUE( -973.00), SIMDE_FLOAT16_VALUE( -159.62), SIMDE_FLOAT16_VALUE( -377.00), SIMDE_FLOAT16_VALUE( -853.00), + SIMDE_FLOAT16_VALUE( 831.00), SIMDE_FLOAT16_VALUE( 661.00), SIMDE_FLOAT16_VALUE( -717.00), SIMDE_FLOAT16_VALUE( 891.50) }, + { SIMDE_FLOAT16_VALUE(512.000000), SIMDE_FLOAT16_VALUE(-649.000000), SIMDE_FLOAT16_VALUE(1338.000000), SIMDE_FLOAT16_VALUE(566.500000), + SIMDE_FLOAT16_VALUE(-1637.000000), SIMDE_FLOAT16_VALUE(1226.000000), SIMDE_FLOAT16_VALUE(-992.000000), SIMDE_FLOAT16_VALUE(-1181.000000) } }, + { { SIMDE_FLOAT16_VALUE( 535.50), SIMDE_FLOAT16_VALUE( 621.50), SIMDE_FLOAT16_VALUE( 578.50), SIMDE_FLOAT16_VALUE( 430.50), + SIMDE_FLOAT16_VALUE( -574.50), SIMDE_FLOAT16_VALUE( -319.75), SIMDE_FLOAT16_VALUE( -735.00), SIMDE_FLOAT16_VALUE( 248.50) }, + { SIMDE_FLOAT16_VALUE( 274.50), SIMDE_FLOAT16_VALUE( 192.38), SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 502.25), + SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( -70.81), SIMDE_FLOAT16_VALUE( -596.50), SIMDE_FLOAT16_VALUE( 810.50) }, + { SIMDE_FLOAT16_VALUE(343.000000), SIMDE_FLOAT16_VALUE(896.000000), SIMDE_FLOAT16_VALUE(76.250000), SIMDE_FLOAT16_VALUE(1110.000000), + SIMDE_FLOAT16_VALUE(-503.750000), SIMDE_FLOAT16_VALUE(-938.000000), SIMDE_FLOAT16_VALUE(-1546.000000), SIMDE_FLOAT16_VALUE(-348.000000) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16x8_t r = simde_vcaddq_rot90_f16(a, b); + + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + } + + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t r = simde_vcaddq_rot90_f16(a, b); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcadd_rot90_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 a[2]; + simde_float32 b[2]; + simde_float32 r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 863.66), SIMDE_FLOAT32_C( 828.31) }, + { SIMDE_FLOAT32_C( -563.51), SIMDE_FLOAT32_C( -576.51) }, + { SIMDE_FLOAT32_C(1440.169922), SIMDE_FLOAT32_C(264.799988) } }, + { { SIMDE_FLOAT32_C( -703.45), SIMDE_FLOAT32_C( 383.90) }, + { SIMDE_FLOAT32_C( -772.46), SIMDE_FLOAT32_C( 457.40) }, + { SIMDE_FLOAT32_C(-1160.849976), SIMDE_FLOAT32_C(-388.560028) } }, + { { SIMDE_FLOAT32_C( 295.99), SIMDE_FLOAT32_C( 653.10) }, + { SIMDE_FLOAT32_C( -120.98), SIMDE_FLOAT32_C( 945.50) }, + { SIMDE_FLOAT32_C(-649.510010), SIMDE_FLOAT32_C(532.119995) } }, + { { SIMDE_FLOAT32_C( -280.81), SIMDE_FLOAT32_C( 631.32) }, + { SIMDE_FLOAT32_C( 688.34), SIMDE_FLOAT32_C( 191.95) }, + { SIMDE_FLOAT32_C(-472.760010), SIMDE_FLOAT32_C(1319.660034) } }, + { { SIMDE_FLOAT32_C( -522.88), SIMDE_FLOAT32_C( -323.79) }, + { SIMDE_FLOAT32_C( -887.99), SIMDE_FLOAT32_C( -283.70) }, + { SIMDE_FLOAT32_C(-239.179993), SIMDE_FLOAT32_C(-1211.780029) } }, + { { SIMDE_FLOAT32_C( -117.76), SIMDE_FLOAT32_C( -841.45) }, + { SIMDE_FLOAT32_C( 664.94), SIMDE_FLOAT32_C( -987.19) }, + { SIMDE_FLOAT32_C(869.429993), SIMDE_FLOAT32_C(-176.510010) } }, + { { SIMDE_FLOAT32_C( -642.89), SIMDE_FLOAT32_C( -152.10) }, + { SIMDE_FLOAT32_C( 963.83), SIMDE_FLOAT32_C( 919.89) }, + { SIMDE_FLOAT32_C(-1562.780029), SIMDE_FLOAT32_C(811.729980) } }, + { { SIMDE_FLOAT32_C( 630.40), SIMDE_FLOAT32_C( -669.33) }, + { SIMDE_FLOAT32_C( 671.13), SIMDE_FLOAT32_C( 256.93) }, + { SIMDE_FLOAT32_C(373.470032), SIMDE_FLOAT32_C( 1.799988) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); + simde_float32x2_t r = simde_vcadd_rot90_f32(a, b); + + simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); + } + + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t r = simde_vcadd_rot90_f32(a, b); + + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcaddq_rot90_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 a[4]; + simde_float32 b[4]; + simde_float32 r[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( -337.31), SIMDE_FLOAT32_C( -857.36), SIMDE_FLOAT32_C( 334.71), SIMDE_FLOAT32_C( -617.33) }, + { SIMDE_FLOAT32_C( -439.38), SIMDE_FLOAT32_C( 245.13), SIMDE_FLOAT32_C( 111.06), SIMDE_FLOAT32_C( 520.69) }, + { SIMDE_FLOAT32_C(-582.440002), SIMDE_FLOAT32_C(-1296.739990), SIMDE_FLOAT32_C(-185.980011), SIMDE_FLOAT32_C(-506.270020) } }, + { { SIMDE_FLOAT32_C( 85.49), SIMDE_FLOAT32_C( 250.19), SIMDE_FLOAT32_C( -679.96), SIMDE_FLOAT32_C( -750.25) }, + { SIMDE_FLOAT32_C( -138.26), SIMDE_FLOAT32_C( -14.62), SIMDE_FLOAT32_C( -921.52), SIMDE_FLOAT32_C( 225.91) }, + { SIMDE_FLOAT32_C(100.110001), SIMDE_FLOAT32_C(111.930008), SIMDE_FLOAT32_C(-905.869995), SIMDE_FLOAT32_C(-1671.770020) } }, + { { SIMDE_FLOAT32_C( 242.83), SIMDE_FLOAT32_C( 869.28), SIMDE_FLOAT32_C( 297.95), SIMDE_FLOAT32_C( 105.66) }, + { SIMDE_FLOAT32_C( -722.51), SIMDE_FLOAT32_C( -802.37), SIMDE_FLOAT32_C( -245.78), SIMDE_FLOAT32_C( 915.39) }, + { SIMDE_FLOAT32_C(1045.199951), SIMDE_FLOAT32_C(146.770020), SIMDE_FLOAT32_C(-617.440002), SIMDE_FLOAT32_C(-140.119995) } }, + { { SIMDE_FLOAT32_C( 54.20), SIMDE_FLOAT32_C( -928.06), SIMDE_FLOAT32_C( 362.39), SIMDE_FLOAT32_C( -936.63) }, + { SIMDE_FLOAT32_C( 185.82), SIMDE_FLOAT32_C( -244.43), SIMDE_FLOAT32_C( 924.66), SIMDE_FLOAT32_C( -643.82) }, + { SIMDE_FLOAT32_C(298.630005), SIMDE_FLOAT32_C(-742.239990), SIMDE_FLOAT32_C(1006.210022), SIMDE_FLOAT32_C(-11.970032) } }, + { { SIMDE_FLOAT32_C( -516.92), SIMDE_FLOAT32_C( -615.16), SIMDE_FLOAT32_C( -751.52), SIMDE_FLOAT32_C( -974.04) }, + { SIMDE_FLOAT32_C( -144.42), SIMDE_FLOAT32_C( 338.27), SIMDE_FLOAT32_C( 704.92), SIMDE_FLOAT32_C( 116.90) }, + { SIMDE_FLOAT32_C(-855.189941), SIMDE_FLOAT32_C(-759.579956), SIMDE_FLOAT32_C(-868.420044), SIMDE_FLOAT32_C(-269.119995) } }, + { { SIMDE_FLOAT32_C( 49.39), SIMDE_FLOAT32_C( -363.00), SIMDE_FLOAT32_C( -476.30), SIMDE_FLOAT32_C( 106.71) }, + { SIMDE_FLOAT32_C( -725.84), SIMDE_FLOAT32_C( -353.71), SIMDE_FLOAT32_C( 268.41), SIMDE_FLOAT32_C( 728.83) }, + { SIMDE_FLOAT32_C(403.099976), SIMDE_FLOAT32_C(-1088.840088), SIMDE_FLOAT32_C(-1205.130005), SIMDE_FLOAT32_C(375.119995) } }, + { { SIMDE_FLOAT32_C( -470.26), SIMDE_FLOAT32_C( 81.87), SIMDE_FLOAT32_C( 72.27), SIMDE_FLOAT32_C( -992.59) }, + { SIMDE_FLOAT32_C( -615.67), SIMDE_FLOAT32_C( 620.28), SIMDE_FLOAT32_C( -606.51), SIMDE_FLOAT32_C( -327.77) }, + { SIMDE_FLOAT32_C(-1090.540039), SIMDE_FLOAT32_C(-533.799988), SIMDE_FLOAT32_C(400.039978), SIMDE_FLOAT32_C(-1599.100098) } }, + { { SIMDE_FLOAT32_C( -331.80), SIMDE_FLOAT32_C( -605.94), SIMDE_FLOAT32_C( -295.86), SIMDE_FLOAT32_C( -275.42) }, + { SIMDE_FLOAT32_C( -752.64), SIMDE_FLOAT32_C( 21.91), SIMDE_FLOAT32_C( 826.83), SIMDE_FLOAT32_C( 599.82) }, + { SIMDE_FLOAT32_C(-353.709991), SIMDE_FLOAT32_C(-1358.580078), SIMDE_FLOAT32_C(-895.679993), SIMDE_FLOAT32_C(551.410034) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); + simde_float32x4_t r = simde_vcaddq_rot90_f32(a, b); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t r = simde_vcaddq_rot90_f32(a, b); + + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcaddq_rot90_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float64 a[2]; + simde_float64 b[2]; + simde_float64 r[2]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( -30.36), SIMDE_FLOAT64_C( 631.53) }, + { SIMDE_FLOAT64_C( 850.75), SIMDE_FLOAT64_C( -263.55) }, + { SIMDE_FLOAT64_C(233.190000), SIMDE_FLOAT64_C(1482.280000) } }, + { { SIMDE_FLOAT64_C( 139.96), SIMDE_FLOAT64_C( 859.14) }, + { SIMDE_FLOAT64_C( -834.47), SIMDE_FLOAT64_C( 216.10) }, + { SIMDE_FLOAT64_C(-76.140000), SIMDE_FLOAT64_C(24.670000) } }, + { { SIMDE_FLOAT64_C( 995.86), SIMDE_FLOAT64_C( 529.74) }, + { SIMDE_FLOAT64_C( 79.08), SIMDE_FLOAT64_C( 947.13) }, + { SIMDE_FLOAT64_C(48.730000), SIMDE_FLOAT64_C(608.820000) } }, + { { SIMDE_FLOAT64_C( 122.02), SIMDE_FLOAT64_C( -250.00) }, + { SIMDE_FLOAT64_C( -361.82), SIMDE_FLOAT64_C( 265.24) }, + { SIMDE_FLOAT64_C(-143.220000), SIMDE_FLOAT64_C(-611.820000) } }, + { { SIMDE_FLOAT64_C( 275.71), SIMDE_FLOAT64_C( 2.71) }, + { SIMDE_FLOAT64_C( 99.79), SIMDE_FLOAT64_C( -137.67) }, + { SIMDE_FLOAT64_C(413.380000), SIMDE_FLOAT64_C(102.500000) } }, + { { SIMDE_FLOAT64_C( -761.19), SIMDE_FLOAT64_C( 813.19) }, + { SIMDE_FLOAT64_C( -897.68), SIMDE_FLOAT64_C( 653.58) }, + { SIMDE_FLOAT64_C(-1414.770000), SIMDE_FLOAT64_C(-84.490000) } }, + { { SIMDE_FLOAT64_C( 396.02), SIMDE_FLOAT64_C( 413.06) }, + { SIMDE_FLOAT64_C( 514.09), SIMDE_FLOAT64_C( -977.67) }, + { SIMDE_FLOAT64_C(1373.690000), SIMDE_FLOAT64_C(927.150000) } }, + { { SIMDE_FLOAT64_C( -671.79), SIMDE_FLOAT64_C( -92.13) }, + { SIMDE_FLOAT64_C( -441.32), SIMDE_FLOAT64_C( -374.27) }, + { SIMDE_FLOAT64_C(-297.520000), SIMDE_FLOAT64_C(-533.450000) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); + simde_float64x2_t r = simde_vcaddq_rot90_f64(a, b); + simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0f, 1000.0f); + simde_float64x2_t b = simde_test_arm_neon_random_f64x2(-1000.0f, 1000.0f); + simde_float64x2_t r = simde_vcaddq_rot90_f64(a, b); + + simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vcadd_rot90_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcaddq_rot90_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcadd_rot90_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcaddq_rot90_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcaddq_rot90_f64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" \ No newline at end of file From 94cc32b31a9ced64a3a807b2da1446032a5a79b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Mon, 16 Oct 2023 17:39:05 +0800 Subject: [PATCH 03/29] [Neon] Add vcmla_lane_f{16/32} and vcmla_laneq_f{16/32} and vcmlaq_lane_f{16/32} and vcmlaq_laneq_f{16/32} --- simde/arm/neon/cmla_lane.h | 316 ++++++++++++++++ test/arm/neon/cmla_lane.c | 750 +++++++++++++++++++++++++++++++++++++ 2 files changed, 1066 insertions(+) create mode 100644 simde/arm/neon/cmla_lane.h create mode 100644 test/arm/neon/cmla_lane.c diff --git a/simde/arm/neon/cmla_lane.h b/simde/arm/neon/cmla_lane.h new file mode 100644 index 000000000..36bf532e5 --- /dev/null +++ b/simde/arm/neon/cmla_lane.h @@ -0,0 +1,316 @@ +/* SPDX-License-Identifier: MIT +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, copy, +* modify, merge, publish, distribute, sublicense, and/or sell copies +* of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +* +* Copyright: +* 2023 Chi-Wei Chu +*/ + +#if !defined(SIMDE_ARM_NEON_CMLA_LANE_H) +#define SIMDE_ARM_NEON_CMLA_LANE_H + +#include "types.h" +#include "dup_lane.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vcmla_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1){ + simde_float16x4_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_2_(vcmla_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + + #else + simde_float16x4_private + r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + } + #endif + result = simde_float16x4_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmla_lane_f16 + #define vcmla_lane_f16(r, a, b, lane) simde_vcmla_lane_f16(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcmla_lane_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcmla_lane_f32(r, a, b, 0); + #else + simde_float32x2_private + r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x2_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmla_lane_f32 + #define vcmla_lane_f32(r, a, b, lane) simde_vcmla_lane_f32(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vcmla_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float16x4_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_2_(vcmla_laneq_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float16x4_private + r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + } + #endif + result = simde_float16x4_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmla_laneq_f16 + #define vcmla_laneq_f16(r, a, b, lane) simde_vcmla_laneq_f16(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcmla_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x2_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_2_(vcmla_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float32x2_private + r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x4_to_private(b).values[lane])); + + + #if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + } + #endif + result = simde_float32x2_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmla_laneq_f32 + #define vcmla_laneq_f32(r, a, b, lane) simde_vcmla_laneq_f32(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vcmlaq_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float16x8_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_2_(vcmlaq_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float16x8_private + r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16(simde_float16x4_to_private(b).values[lane])); + + + #if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + } + #endif + result = simde_float16x8_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_lane_f16 + #define vcmlaq_lane_f16(r, a, b, lane) simde_vcmlaq_lane_f16(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcmlaq_lane_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcmlaq_lane_f32(r, a, b, 0); + #else + simde_float32x4_private + r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x2_to_private(b).values[lane])); + + + #if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_lane_f32 + #define vcmlaq_lane_f32(r, a, b, lane) simde_vcmlaq_lane_f32(r, a, b, lane) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vcmlaq_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_float16x8_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_4_(vcmlaq_laneq_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float16x8_private + r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16(simde_float16x8_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + } + #endif + result = simde_float16x8_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_laneq_f16 + #define vcmlaq_laneq_f16(r, a, b, lane) simde_vcmlaq_laneq_f16(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcmlaq_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x4_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_2_(vcmlaq_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float32x4_private + r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x4_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + } + #endif + result = simde_float32x4_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_laneq_f32 + #define vcmlaq_laneq_f32(r, a, b, lane) simde_vcmlaq_laneq_f32(r, a, b, lane) +#endif + + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CMLA_LANE_H) */ \ No newline at end of file diff --git a/test/arm/neon/cmla_lane.c b/test/arm/neon/cmla_lane.c new file mode 100644 index 000000000..5ae9d6fba --- /dev/null +++ b/test/arm/neon/cmla_lane.c @@ -0,0 +1,750 @@ +#define SIMDE_TEST_ARM_NEON_INSN cmla_lane + +#include "test-neon.h" +#include "../../../simde/arm/neon/cmla_lane.h" +#include "../../../simde/arm/neon/dup_n.h" + +static int +test_simde_vcmla_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t r_[4]; + simde_float16_t a[4]; + simde_float16_t b[4]; + const int lane; + simde_float16_t r[4]; + } test_vec[] = { + { + { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) }, + { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -31.19) }, + { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -48.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-7724.000000), SIMDE_FLOAT16_VALUE(-7784.000000), SIMDE_FLOAT16_VALUE(-10416.000000), SIMDE_FLOAT16_VALUE(-10352.000000) } }, + { { SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, + { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, + { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(48000.000000), SIMDE_FLOAT16_VALUE(47392.000000), SIMDE_FLOAT16_VALUE(-22592.000000), SIMDE_FLOAT16_VALUE(-21312.000000) } }, + { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, + { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, + { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-51488.000000), SIMDE_FLOAT16_VALUE(-51680.000000), SIMDE_FLOAT16_VALUE(48192.000000), SIMDE_FLOAT16_VALUE(46528.000000) } }, + { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -180.50) }, + { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 129.00), SIMDE_FLOAT16_VALUE( 99.06), SIMDE_FLOAT16_VALUE( -75.25) }, + { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 239.50), SIMDE_FLOAT16_VALUE( -29.96), SIMDE_FLOAT16_VALUE( -177.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-37536.000000), SIMDE_FLOAT16_VALUE(-37824.000000), SIMDE_FLOAT16_VALUE(23584.000000), SIMDE_FLOAT16_VALUE(23552.000000) } }, + { { SIMDE_FLOAT16_VALUE( 167.25), SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( -63.38), SIMDE_FLOAT16_VALUE( 57.00) }, + { SIMDE_FLOAT16_VALUE( 191.75), SIMDE_FLOAT16_VALUE( -197.00), SIMDE_FLOAT16_VALUE( 285.00), SIMDE_FLOAT16_VALUE( -529.00) }, + { SIMDE_FLOAT16_VALUE( -180.50), SIMDE_FLOAT16_VALUE( 375.50), SIMDE_FLOAT16_VALUE( -206.00), SIMDE_FLOAT16_VALUE( -75.25) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-34432.000000), SIMDE_FLOAT16_VALUE(-34624.000000), SIMDE_FLOAT16_VALUE(-51520.000000), SIMDE_FLOAT16_VALUE(-51392.000000) } }, + { { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( -41.00), SIMDE_FLOAT16_VALUE( 285.00) }, + { SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -271.25), SIMDE_FLOAT16_VALUE( -257.50) }, + { SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( -180.50), SIMDE_FLOAT16_VALUE( 69.62), SIMDE_FLOAT16_VALUE( 131.38) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(243.125000), SIMDE_FLOAT16_VALUE(199.000000), SIMDE_FLOAT16_VALUE(48928.000000), SIMDE_FLOAT16_VALUE(49248.000000) } }, + { { SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -247.00), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 17.94) }, + { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( -110.75), SIMDE_FLOAT16_VALUE( 18.20) }, + { SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 97.31) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-35520.000000), SIMDE_FLOAT16_VALUE(-35968.000000), SIMDE_FLOAT16_VALUE(-9888.000000), SIMDE_FLOAT16_VALUE(-9928.000000) } }, + { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00) }, + { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( -151.12) }, + { SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44), SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( 75.88) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-18624.000000), SIMDE_FLOAT16_VALUE(-18464.000000), SIMDE_FLOAT16_VALUE(-13800.000000), SIMDE_FLOAT16_VALUE(-13680.000000) } } + + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x4_t r; + SIMDE_CONSTIFY_2_(simde_vcmla_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t r = simde_vcmla_lane_f16(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmla_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32_t r_[2]; + simde_float32_t a[2]; + simde_float32_t b[2]; + const int lane; + simde_float32_t r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 83.21), SIMDE_FLOAT32_C( 417.90) }, + { SIMDE_FLOAT32_C( -875.72), SIMDE_FLOAT32_C( 830.54) }, + { SIMDE_FLOAT32_C( -633.53), SIMDE_FLOAT32_C( 832.17) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(554878.125000), SIMDE_FLOAT32_C(555212.812500) } }, + { { SIMDE_FLOAT32_C( -890.17), SIMDE_FLOAT32_C( 649.92) }, + { SIMDE_FLOAT32_C( -111.22), SIMDE_FLOAT32_C( -830.36) }, + { SIMDE_FLOAT32_C( 59.76), SIMDE_FLOAT32_C( 970.61) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-7536.677246), SIMDE_FLOAT32_C(-5996.586914) } }, + { { SIMDE_FLOAT32_C( 522.31), SIMDE_FLOAT32_C( -822.40) }, + { SIMDE_FLOAT32_C( 411.34), SIMDE_FLOAT32_C( -692.35) }, + { SIMDE_FLOAT32_C( 648.71), SIMDE_FLOAT32_C( 385.20) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(267362.687500), SIMDE_FLOAT32_C(266017.968750) } }, + { { SIMDE_FLOAT32_C( 479.18), SIMDE_FLOAT32_C( -793.73) }, + { SIMDE_FLOAT32_C( -740.26), SIMDE_FLOAT32_C( 245.04) }, + { SIMDE_FLOAT32_C( 229.26), SIMDE_FLOAT32_C( -113.23) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-169232.828125), SIMDE_FLOAT32_C(-170505.734375) } }, + { { SIMDE_FLOAT32_C( 331.48), SIMDE_FLOAT32_C( -677.34) }, + { SIMDE_FLOAT32_C( 97.30), SIMDE_FLOAT32_C( -52.10) }, + { SIMDE_FLOAT32_C( 239.17), SIMDE_FLOAT32_C( 469.68) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(23602.720703), SIMDE_FLOAT32_C(22593.902344) } }, + { { SIMDE_FLOAT32_C( -543.40), SIMDE_FLOAT32_C( 826.85) }, + { SIMDE_FLOAT32_C( 226.38), SIMDE_FLOAT32_C( 178.84) }, + { SIMDE_FLOAT32_C( 181.71), SIMDE_FLOAT32_C( 420.52) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(40592.113281), SIMDE_FLOAT32_C(41962.363281) } }, + { { SIMDE_FLOAT32_C( -698.84), SIMDE_FLOAT32_C( -151.15) }, + { SIMDE_FLOAT32_C( -388.27), SIMDE_FLOAT32_C( 350.81) }, + { SIMDE_FLOAT32_C( 890.40), SIMDE_FLOAT32_C( -664.75) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-346414.437500), SIMDE_FLOAT32_C(-345866.750000) } }, + { { SIMDE_FLOAT32_C( -617.94), SIMDE_FLOAT32_C( 190.84) }, + { SIMDE_FLOAT32_C( 218.13), SIMDE_FLOAT32_C( -328.97) }, + { SIMDE_FLOAT32_C( -549.59), SIMDE_FLOAT32_C( -459.89) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-120500.015625), SIMDE_FLOAT32_C(-119691.234375) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); + simde_float32x2_t r = simde_vcmla_lane_f32(r_, a, b, 0); + simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); + // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t r = simde_vcmla_lane_f32(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + + +static int +test_simde_vcmla_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t r_[4]; + simde_float16_t a[4]; + simde_float16_t b[8]; + const int lane; + simde_float16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 816.50), SIMDE_FLOAT16_VALUE( 773.50), SIMDE_FLOAT16_VALUE( -377.75), SIMDE_FLOAT16_VALUE( 544.50) }, + { SIMDE_FLOAT16_VALUE( -188.38), SIMDE_FLOAT16_VALUE( 269.75), SIMDE_FLOAT16_VALUE( 115.62), SIMDE_FLOAT16_VALUE( -544.00) }, + { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( 484.00), SIMDE_FLOAT16_VALUE( -757.00), + SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-7264.000000), SIMDE_FLOAT16_VALUE(-7308.000000), SIMDE_FLOAT16_VALUE(4584.000000), SIMDE_FLOAT16_VALUE(5504.000000) } }, + { { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, + { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, + { SIMDE_FLOAT16_VALUE( -899.00), SIMDE_FLOAT16_VALUE( 32.51), SIMDE_FLOAT16_VALUE( 359.00), SIMDE_FLOAT16_VALUE( 342.25), + SIMDE_FLOAT16_VALUE( 913.00), SIMDE_FLOAT16_VALUE( 740.50), SIMDE_FLOAT16_VALUE( -436.25), SIMDE_FLOAT16_VALUE( -904.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(87.312500), SIMDE_FLOAT16_VALUE(-319.750000), SIMDE_FLOAT16_VALUE(3616.000000), SIMDE_FLOAT16_VALUE(4476.000000) } }, + { { SIMDE_FLOAT16_VALUE( -335.25), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( 227.25), SIMDE_FLOAT16_VALUE( -8.67) }, + { SIMDE_FLOAT16_VALUE( 490.50), SIMDE_FLOAT16_VALUE( 507.50), SIMDE_FLOAT16_VALUE( -979.50), SIMDE_FLOAT16_VALUE( -718.00) }, + { SIMDE_FLOAT16_VALUE( -10.20), SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -797.50), SIMDE_FLOAT16_VALUE( 741.50), + SIMDE_FLOAT16_VALUE( -835.00), SIMDE_FLOAT16_VALUE( -308.00), SIMDE_FLOAT16_VALUE( -674.50), SIMDE_FLOAT16_VALUE( -247.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-5340.000000), SIMDE_FLOAT16_VALUE(-5256.000000), SIMDE_FLOAT16_VALUE(10224.000000), SIMDE_FLOAT16_VALUE(9984.000000) } }, + { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, + { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, + { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50), + SIMDE_FLOAT16_VALUE( -975.50), SIMDE_FLOAT16_VALUE( 383.50), SIMDE_FLOAT16_VALUE( 500.25), SIMDE_FLOAT16_VALUE( 517.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(26144.000000), SIMDE_FLOAT16_VALUE(25920.000000), SIMDE_FLOAT16_VALUE(-23680.000000), SIMDE_FLOAT16_VALUE(-25360.000000) } }, + { { SIMDE_FLOAT16_VALUE( 540.00), SIMDE_FLOAT16_VALUE( -927.50), SIMDE_FLOAT16_VALUE( 17.94), SIMDE_FLOAT16_VALUE( 340.75) }, + { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( 854.00), SIMDE_FLOAT16_VALUE( -110.75) }, + { SIMDE_FLOAT16_VALUE( -53.36), SIMDE_FLOAT16_VALUE( -465.00), SIMDE_FLOAT16_VALUE( -104.00), SIMDE_FLOAT16_VALUE( -348.00), + SIMDE_FLOAT16_VALUE( -370.75), SIMDE_FLOAT16_VALUE( 18.20), SIMDE_FLOAT16_VALUE( -209.25), SIMDE_FLOAT16_VALUE( -856.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(21776.000000), SIMDE_FLOAT16_VALUE(20304.000000), SIMDE_FLOAT16_VALUE(-45568.000000), SIMDE_FLOAT16_VALUE(-45248.000000) } }, + { { SIMDE_FLOAT16_VALUE( 251.25), SIMDE_FLOAT16_VALUE( 523.50), SIMDE_FLOAT16_VALUE( -62.34), SIMDE_FLOAT16_VALUE( -412.00) }, + { SIMDE_FLOAT16_VALUE( 818.50), SIMDE_FLOAT16_VALUE( -266.25), SIMDE_FLOAT16_VALUE( 558.50), SIMDE_FLOAT16_VALUE( -474.75) }, + { SIMDE_FLOAT16_VALUE( 583.50), SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 875.50), SIMDE_FLOAT16_VALUE( 382.50), + SIMDE_FLOAT16_VALUE( -535.00), SIMDE_FLOAT16_VALUE( 374.50), SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 432.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(48256.000000), SIMDE_FLOAT16_VALUE(48544.000000), SIMDE_FLOAT16_VALUE(32704.000000), SIMDE_FLOAT16_VALUE(32352.000000) } }, + { { SIMDE_FLOAT16_VALUE( 83.19), SIMDE_FLOAT16_VALUE( 418.00), SIMDE_FLOAT16_VALUE( -875.50), SIMDE_FLOAT16_VALUE( 830.50) }, + { SIMDE_FLOAT16_VALUE( -633.50), SIMDE_FLOAT16_VALUE( 832.00), SIMDE_FLOAT16_VALUE( -890.00), SIMDE_FLOAT16_VALUE( 650.00) }, + { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -830.50), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( 970.50), + SIMDE_FLOAT16_VALUE( 522.50), SIMDE_FLOAT16_VALUE( -822.50), SIMDE_FLOAT16_VALUE( 411.25), SIMDE_FLOAT16_VALUE( -692.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(19840.000000), SIMDE_FLOAT16_VALUE(20176.000000), SIMDE_FLOAT16_VALUE(26880.000000), SIMDE_FLOAT16_VALUE(28592.000000) } }, + { { SIMDE_FLOAT16_VALUE( 648.50), SIMDE_FLOAT16_VALUE( 385.25), SIMDE_FLOAT16_VALUE( 479.25), SIMDE_FLOAT16_VALUE( -793.50) }, + { SIMDE_FLOAT16_VALUE( -740.50), SIMDE_FLOAT16_VALUE( 245.00), SIMDE_FLOAT16_VALUE( 229.25), SIMDE_FLOAT16_VALUE( -113.25) }, + { SIMDE_FLOAT16_VALUE( 331.50), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( 97.31), SIMDE_FLOAT16_VALUE( -52.09), + SIMDE_FLOAT16_VALUE( 239.12), SIMDE_FLOAT16_VALUE( 469.75), SIMDE_FLOAT16_VALUE( -543.50), SIMDE_FLOAT16_VALUE( 827.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(56384.000000), SIMDE_FLOAT16_VALUE(56096.000000), SIMDE_FLOAT16_VALUE(-16768.000000), SIMDE_FLOAT16_VALUE(-18048.000000) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16x4_t r; + SIMDE_CONSTIFY_2_(simde_vcmla_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x4_t r = simde_vcmla_laneq_f16(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmla_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32_t r_[2]; + simde_float32_t a[2]; + simde_float32_t b[4]; + const int lane; + simde_float32_t r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( -470.26), SIMDE_FLOAT32_C( 81.87) }, + { SIMDE_FLOAT32_C( 72.27), SIMDE_FLOAT32_C( -992.59) }, + { SIMDE_FLOAT32_C( -615.67), SIMDE_FLOAT32_C( 620.28), SIMDE_FLOAT32_C( -606.51), SIMDE_FLOAT32_C( -327.77) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-44964.726562), SIMDE_FLOAT32_C(-44412.597656) } }, + { { SIMDE_FLOAT32_C( -331.80), SIMDE_FLOAT32_C( -605.94) }, + { SIMDE_FLOAT32_C( -295.86), SIMDE_FLOAT32_C( -275.42) }, + { SIMDE_FLOAT32_C( -752.64), SIMDE_FLOAT32_C( 21.91), SIMDE_FLOAT32_C( 826.83), SIMDE_FLOAT32_C( 599.82) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-6814.092285), SIMDE_FLOAT32_C(-7088.232422) } }, + { { SIMDE_FLOAT32_C( -30.36), SIMDE_FLOAT32_C( 631.53) }, + { SIMDE_FLOAT32_C( 850.75), SIMDE_FLOAT32_C( -263.55) }, + { SIMDE_FLOAT32_C( 139.96), SIMDE_FLOAT32_C( 859.14), SIMDE_FLOAT32_C( -834.47), SIMDE_FLOAT32_C( 216.10) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(119040.617188), SIMDE_FLOAT32_C(119702.507812) } }, + { { SIMDE_FLOAT32_C( 995.86), SIMDE_FLOAT32_C( 529.74) }, + { SIMDE_FLOAT32_C( 79.08), SIMDE_FLOAT32_C( 947.13) }, + { SIMDE_FLOAT32_C( 122.02), SIMDE_FLOAT32_C( -250.00), SIMDE_FLOAT32_C( -361.82), SIMDE_FLOAT32_C( 265.24) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-18774.140625), SIMDE_FLOAT32_C(-19240.259766) } }, + { { SIMDE_FLOAT32_C( 275.71), SIMDE_FLOAT32_C( 2.71) }, + { SIMDE_FLOAT32_C( 99.79), SIMDE_FLOAT32_C( -137.67) }, + { SIMDE_FLOAT32_C( -761.19), SIMDE_FLOAT32_C( 813.19), SIMDE_FLOAT32_C( -897.68), SIMDE_FLOAT32_C( 653.58) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-75683.437500), SIMDE_FLOAT32_C(-75956.437500) } }, + { { SIMDE_FLOAT32_C( 396.02), SIMDE_FLOAT32_C( 413.06) }, + { SIMDE_FLOAT32_C( 514.09), SIMDE_FLOAT32_C( -977.67) }, + { SIMDE_FLOAT32_C( -671.79), SIMDE_FLOAT32_C( -92.13), SIMDE_FLOAT32_C( -441.32), SIMDE_FLOAT32_C( -374.27) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-46967.093750), SIMDE_FLOAT32_C(-46950.054688) } }, + { { SIMDE_FLOAT32_C( -151.97), SIMDE_FLOAT32_C( -79.55) }, + { SIMDE_FLOAT32_C( -214.62), SIMDE_FLOAT32_C( -614.75) }, + { SIMDE_FLOAT32_C( 678.79), SIMDE_FLOAT32_C( 783.83), SIMDE_FLOAT32_C( 493.05), SIMDE_FLOAT32_C( -896.00) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-145833.875000), SIMDE_FLOAT32_C(-145761.453125) } }, + { { SIMDE_FLOAT32_C( 104.13), SIMDE_FLOAT32_C( 278.54) }, + { SIMDE_FLOAT32_C( 171.54), SIMDE_FLOAT32_C( -682.63) }, + { SIMDE_FLOAT32_C( 217.09), SIMDE_FLOAT32_C( 49.35), SIMDE_FLOAT32_C( 256.50), SIMDE_FLOAT32_C( -92.04) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(8569.627930), SIMDE_FLOAT32_C(8744.038086) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); + simde_float32x2_t r; + SIMDE_CONSTIFY_2_(simde_vcmla_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); + // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x2_t r = simde_vcmla_laneq_f32(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmlaq_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t r_[8]; + simde_float16_t a[8]; + simde_float16_t b[4]; + const int lane; + simde_float16_t r[8]; + } test_vec[] = { + + { { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( 420.50), + SIMDE_FLOAT16_VALUE( -699.00), SIMDE_FLOAT16_VALUE( -151.12), SIMDE_FLOAT16_VALUE( -388.25), SIMDE_FLOAT16_VALUE( 350.75) }, + { SIMDE_FLOAT16_VALUE( 890.50), SIMDE_FLOAT16_VALUE( -665.00), SIMDE_FLOAT16_VALUE( -618.00), SIMDE_FLOAT16_VALUE( 190.88), + SIMDE_FLOAT16_VALUE( 218.12), SIMDE_FLOAT16_VALUE( -329.00), SIMDE_FLOAT16_VALUE( -549.50), SIMDE_FLOAT16_VALUE( -460.00) }, + { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -695.50), SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-43648.000000), SIMDE_FLOAT16_VALUE(-43712.000000), SIMDE_FLOAT16_VALUE(30640.000000), SIMDE_FLOAT16_VALUE(30880.000000), + SIMDE_FLOAT16_VALUE(-11448.000000), SIMDE_FLOAT16_VALUE(-10904.000000), SIMDE_FLOAT16_VALUE(26688.000000), SIMDE_FLOAT16_VALUE(27424.000000) } }, + { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00), + SIMDE_FLOAT16_VALUE( -786.50), SIMDE_FLOAT16_VALUE( -974.00), SIMDE_FLOAT16_VALUE( -570.50), SIMDE_FLOAT16_VALUE( 369.25) }, + { SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( -640.00), SIMDE_FLOAT16_VALUE( -552.00), SIMDE_FLOAT16_VALUE( 75.88), + SIMDE_FLOAT16_VALUE( 352.25), SIMDE_FLOAT16_VALUE( 324.25), SIMDE_FLOAT16_VALUE( 485.50), SIMDE_FLOAT16_VALUE( 943.50) }, + { SIMDE_FLOAT16_VALUE( -976.00), SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -100.62), SIMDE_FLOAT16_VALUE( -463.75) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(3430.000000), SIMDE_FLOAT16_VALUE(3588.000000), SIMDE_FLOAT16_VALUE(-48928.000000), SIMDE_FLOAT16_VALUE(-48800.000000), + SIMDE_FLOAT16_VALUE(30720.000000), SIMDE_FLOAT16_VALUE(30528.000000), SIMDE_FLOAT16_VALUE(42848.000000), SIMDE_FLOAT16_VALUE(43776.000000) } }, + { { SIMDE_FLOAT16_VALUE( -973.00), SIMDE_FLOAT16_VALUE( -159.62), SIMDE_FLOAT16_VALUE( -377.00), SIMDE_FLOAT16_VALUE( -853.00), + SIMDE_FLOAT16_VALUE( 831.00), SIMDE_FLOAT16_VALUE( 661.00), SIMDE_FLOAT16_VALUE( -717.00), SIMDE_FLOAT16_VALUE( 891.50) }, + { SIMDE_FLOAT16_VALUE( 535.50), SIMDE_FLOAT16_VALUE( 621.50), SIMDE_FLOAT16_VALUE( 578.50), SIMDE_FLOAT16_VALUE( 430.50), + SIMDE_FLOAT16_VALUE( -574.50), SIMDE_FLOAT16_VALUE( -319.75), SIMDE_FLOAT16_VALUE( -735.00), SIMDE_FLOAT16_VALUE( 248.50) }, + { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 192.38), SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 502.25) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(16480.000000), SIMDE_FLOAT16_VALUE(17296.000000), SIMDE_FLOAT16_VALUE(18480.000000), SIMDE_FLOAT16_VALUE(18000.000000), + SIMDE_FLOAT16_VALUE(-17888.000000), SIMDE_FLOAT16_VALUE(-18064.000000), SIMDE_FLOAT16_VALUE(-24672.000000), SIMDE_FLOAT16_VALUE(-23072.000000) } }, + { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( -70.81), SIMDE_FLOAT16_VALUE( -596.50), SIMDE_FLOAT16_VALUE( 810.50), + SIMDE_FLOAT16_VALUE( 863.50), SIMDE_FLOAT16_VALUE( 828.50), SIMDE_FLOAT16_VALUE( -563.50), SIMDE_FLOAT16_VALUE( -576.50) }, + { SIMDE_FLOAT16_VALUE( -703.50), SIMDE_FLOAT16_VALUE( 384.00), SIMDE_FLOAT16_VALUE( -772.50), SIMDE_FLOAT16_VALUE( 457.50), + SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( 653.00), SIMDE_FLOAT16_VALUE( -121.00), SIMDE_FLOAT16_VALUE( 945.50) }, + { SIMDE_FLOAT16_VALUE( -280.75), SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( 688.50), SIMDE_FLOAT16_VALUE( 192.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(21504.000000), SIMDE_FLOAT16_VALUE(22064.000000), SIMDE_FLOAT16_VALUE(23696.000000), SIMDE_FLOAT16_VALUE(25104.000000), + SIMDE_FLOAT16_VALUE(-8448.000000), SIMDE_FLOAT16_VALUE(-8480.000000), SIMDE_FLOAT16_VALUE(3242.000000), SIMDE_FLOAT16_VALUE(3230.000000) } }, + { { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -323.75), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -283.75), + SIMDE_FLOAT16_VALUE( -117.75), SIMDE_FLOAT16_VALUE( -841.50), SIMDE_FLOAT16_VALUE( 665.00), SIMDE_FLOAT16_VALUE( -987.00) }, + { SIMDE_FLOAT16_VALUE( -643.00), SIMDE_FLOAT16_VALUE( -152.12), SIMDE_FLOAT16_VALUE( 964.00), SIMDE_FLOAT16_VALUE( 920.00), + SIMDE_FLOAT16_VALUE( 630.50), SIMDE_FLOAT16_VALUE( -669.50), SIMDE_FLOAT16_VALUE( 671.00), SIMDE_FLOAT16_VALUE( 257.00) }, + { SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -857.50), SIMDE_FLOAT16_VALUE( 334.75), SIMDE_FLOAT16_VALUE( -617.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-7092.000000), SIMDE_FLOAT16_VALUE(-6896.000000), SIMDE_FLOAT16_VALUE(8960.000000), SIMDE_FLOAT16_VALUE(9568.000000), + SIMDE_FLOAT16_VALUE(6324.000000), SIMDE_FLOAT16_VALUE(5600.000000), SIMDE_FLOAT16_VALUE(7520.000000), SIMDE_FLOAT16_VALUE(5868.000000) } }, + { { SIMDE_FLOAT16_VALUE( -439.50), SIMDE_FLOAT16_VALUE( 245.12), SIMDE_FLOAT16_VALUE( 111.06), SIMDE_FLOAT16_VALUE( 520.50), + SIMDE_FLOAT16_VALUE( 85.50), SIMDE_FLOAT16_VALUE( 250.25), SIMDE_FLOAT16_VALUE( -680.00), SIMDE_FLOAT16_VALUE( -750.00) }, + { SIMDE_FLOAT16_VALUE( -138.25), SIMDE_FLOAT16_VALUE( -14.62), SIMDE_FLOAT16_VALUE( -921.50), SIMDE_FLOAT16_VALUE( 225.88), + SIMDE_FLOAT16_VALUE( 242.88), SIMDE_FLOAT16_VALUE( 869.50), SIMDE_FLOAT16_VALUE( 298.00), SIMDE_FLOAT16_VALUE( 105.69) }, + { SIMDE_FLOAT16_VALUE( -722.50), SIMDE_FLOAT16_VALUE( -8.75), SIMDE_FLOAT16_VALUE( -245.75), SIMDE_FLOAT16_VALUE( 915.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(770.000000), SIMDE_FLOAT16_VALUE(1455.000000), SIMDE_FLOAT16_VALUE(8176.000000), SIMDE_FLOAT16_VALUE(8584.000000), + SIMDE_FLOAT16_VALUE(-2040.000000), SIMDE_FLOAT16_VALUE(-1875.000000), SIMDE_FLOAT16_VALUE(-3288.000000), SIMDE_FLOAT16_VALUE(-3358.000000) } }, + { { SIMDE_FLOAT16_VALUE( 54.19), SIMDE_FLOAT16_VALUE( -928.00), SIMDE_FLOAT16_VALUE( 362.50), SIMDE_FLOAT16_VALUE( -936.50), + SIMDE_FLOAT16_VALUE( 185.88), SIMDE_FLOAT16_VALUE( -244.38), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( -644.00) }, + { SIMDE_FLOAT16_VALUE( -517.00), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -751.50), SIMDE_FLOAT16_VALUE( -974.00), + SIMDE_FLOAT16_VALUE( -144.38), SIMDE_FLOAT16_VALUE( 338.25), SIMDE_FLOAT16_VALUE( 705.00), SIMDE_FLOAT16_VALUE( 116.88) }, + { SIMDE_FLOAT16_VALUE( 49.38), SIMDE_FLOAT16_VALUE( -363.00), SIMDE_FLOAT16_VALUE( -476.25), SIMDE_FLOAT16_VALUE( 106.69) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-25472.000000), SIMDE_FLOAT16_VALUE(-26448.000000), SIMDE_FLOAT16_VALUE(-36736.000000), SIMDE_FLOAT16_VALUE(-38048.000000), + SIMDE_FLOAT16_VALUE(-6944.000000), SIMDE_FLOAT16_VALUE(-7372.000000), SIMDE_FLOAT16_VALUE(35744.000000), SIMDE_FLOAT16_VALUE(34176.000000) } }, + { { SIMDE_FLOAT16_VALUE( -726.00), SIMDE_FLOAT16_VALUE( -353.75), SIMDE_FLOAT16_VALUE( 268.50), SIMDE_FLOAT16_VALUE( 729.00), + SIMDE_FLOAT16_VALUE( -470.25), SIMDE_FLOAT16_VALUE( 81.88), SIMDE_FLOAT16_VALUE( 72.25), SIMDE_FLOAT16_VALUE( -992.50) }, + { SIMDE_FLOAT16_VALUE( -615.50), SIMDE_FLOAT16_VALUE( 620.50), SIMDE_FLOAT16_VALUE( -606.50), SIMDE_FLOAT16_VALUE( -327.75), + SIMDE_FLOAT16_VALUE( -331.75), SIMDE_FLOAT16_VALUE( -606.00), SIMDE_FLOAT16_VALUE( -295.75), SIMDE_FLOAT16_VALUE( -275.50) }, + { SIMDE_FLOAT16_VALUE( -752.50), SIMDE_FLOAT16_VALUE( 21.91), SIMDE_FLOAT16_VALUE( 827.00), SIMDE_FLOAT16_VALUE( 600.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-14208.000000), SIMDE_FLOAT16_VALUE(-13840.000000), SIMDE_FLOAT16_VALUE(-13016.000000), SIMDE_FLOAT16_VALUE(-12560.000000), + SIMDE_FLOAT16_VALUE(-7736.000000), SIMDE_FLOAT16_VALUE(-7184.000000), SIMDE_FLOAT16_VALUE(-6408.000000), SIMDE_FLOAT16_VALUE(-7472.000000) } } + + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x8_t r; + SIMDE_CONSTIFY_2_(simde_vcmlaq_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x8_t r = simde_vcmlaq_lane_f16(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmlaq_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32_t r_[4]; + simde_float32_t a[4]; + simde_float32_t b[2]; + const int lane; + simde_float32_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 301.65), SIMDE_FLOAT32_C( 490.71), SIMDE_FLOAT32_C( -744.66), SIMDE_FLOAT32_C( -738.17) }, + { SIMDE_FLOAT32_C( -301.20), SIMDE_FLOAT32_C( -904.34), SIMDE_FLOAT32_C( 771.98), SIMDE_FLOAT32_C( 233.71) }, + { SIMDE_FLOAT32_C( 830.18), SIMDE_FLOAT32_C( 979.39) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-249748.578125), SIMDE_FLOAT32_C(-249559.515625), SIMDE_FLOAT32_C(640137.687500), SIMDE_FLOAT32_C(640144.187500) } }, + { { SIMDE_FLOAT32_C( -38.01), SIMDE_FLOAT32_C( 151.61), SIMDE_FLOAT32_C( 201.45), SIMDE_FLOAT32_C( -747.32) }, + { SIMDE_FLOAT32_C( -331.17), SIMDE_FLOAT32_C( 7.62), SIMDE_FLOAT32_C( -454.77), SIMDE_FLOAT32_C( -381.81) }, + { SIMDE_FLOAT32_C( 236.39), SIMDE_FLOAT32_C( -158.94) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-78323.289062), SIMDE_FLOAT32_C(-78133.671875), SIMDE_FLOAT32_C(-107301.625000), SIMDE_FLOAT32_C(-108250.398438) } }, + { { SIMDE_FLOAT32_C( 605.85), SIMDE_FLOAT32_C( 244.27), SIMDE_FLOAT32_C( -426.53), SIMDE_FLOAT32_C( -969.18) }, + { SIMDE_FLOAT32_C( 322.13), SIMDE_FLOAT32_C( 863.77), SIMDE_FLOAT32_C( -685.35), SIMDE_FLOAT32_C( -710.70) }, + { SIMDE_FLOAT32_C( -791.12), SIMDE_FLOAT32_C( 373.53) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-254237.640625), SIMDE_FLOAT32_C(-254599.218750), SIMDE_FLOAT32_C(541767.562500), SIMDE_FLOAT32_C(541224.875000) } }, + { { SIMDE_FLOAT32_C( -606.46), SIMDE_FLOAT32_C( -507.55), SIMDE_FLOAT32_C( -68.24), SIMDE_FLOAT32_C( -823.05) }, + { SIMDE_FLOAT32_C( -359.95), SIMDE_FLOAT32_C( 611.92), SIMDE_FLOAT32_C( 514.14), SIMDE_FLOAT32_C( -660.86) }, + { SIMDE_FLOAT32_C( 181.81), SIMDE_FLOAT32_C( 115.86) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-66048.968750), SIMDE_FLOAT32_C(-65950.062500), SIMDE_FLOAT32_C(93407.554688), SIMDE_FLOAT32_C(92652.742188) } }, + { { SIMDE_FLOAT32_C( 951.84), SIMDE_FLOAT32_C( -864.96), SIMDE_FLOAT32_C( 970.59), SIMDE_FLOAT32_C( 769.97) }, + { SIMDE_FLOAT32_C( 609.64), SIMDE_FLOAT32_C( -580.87), SIMDE_FLOAT32_C( 358.59), SIMDE_FLOAT32_C( 350.56) }, + { SIMDE_FLOAT32_C( 26.31), SIMDE_FLOAT32_C( 163.74) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(16991.468750), SIMDE_FLOAT32_C(15174.667969), SIMDE_FLOAT32_C(10405.092773), SIMDE_FLOAT32_C(10204.472656) } }, + { { SIMDE_FLOAT32_C( -636.68), SIMDE_FLOAT32_C( 554.68), SIMDE_FLOAT32_C( -385.40), SIMDE_FLOAT32_C( -565.95) }, + { SIMDE_FLOAT32_C( 129.69), SIMDE_FLOAT32_C( 961.79), SIMDE_FLOAT32_C( -333.22), SIMDE_FLOAT32_C( 69.65) }, + { SIMDE_FLOAT32_C( 726.96), SIMDE_FLOAT32_C( 131.41) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(93642.765625), SIMDE_FLOAT32_C(94834.125000), SIMDE_FLOAT32_C(-242623.015625), SIMDE_FLOAT32_C(-242803.562500) } }, + { { SIMDE_FLOAT32_C( -211.91), SIMDE_FLOAT32_C( 829.24), SIMDE_FLOAT32_C( -475.13), SIMDE_FLOAT32_C( 562.33) }, + { SIMDE_FLOAT32_C( 515.59), SIMDE_FLOAT32_C( -290.69), SIMDE_FLOAT32_C( -816.53), SIMDE_FLOAT32_C( 17.39) }, + { SIMDE_FLOAT32_C( 268.14), SIMDE_FLOAT32_C( 729.88) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(138038.406250), SIMDE_FLOAT32_C(139079.562500), SIMDE_FLOAT32_C(-219419.500000), SIMDE_FLOAT32_C(-218382.046875) } }, + { { SIMDE_FLOAT32_C( -894.99), SIMDE_FLOAT32_C( 516.42), SIMDE_FLOAT32_C( -169.55), SIMDE_FLOAT32_C( 696.41) }, + { SIMDE_FLOAT32_C( -388.51), SIMDE_FLOAT32_C( 987.71), SIMDE_FLOAT32_C( -91.49), SIMDE_FLOAT32_C( -970.85) }, + { SIMDE_FLOAT32_C( 357.28), SIMDE_FLOAT32_C( -28.01) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-139701.843750), SIMDE_FLOAT32_C(-138290.437500), SIMDE_FLOAT32_C(-32857.097656), SIMDE_FLOAT32_C(-31991.136719) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); + simde_float32x4_t r = simde_vcmlaq_lane_f32(r_, a, b, 0); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x4_t r = simde_vcmlaq_lane_f32(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmlaq_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t r_[8]; + simde_float16_t a[8]; + simde_float16_t b[8]; + const int lane; + simde_float16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( -30.36), SIMDE_FLOAT16_VALUE( 631.50), SIMDE_FLOAT16_VALUE( 851.00), SIMDE_FLOAT16_VALUE( -263.50), + SIMDE_FLOAT16_VALUE( 140.00), SIMDE_FLOAT16_VALUE( 859.00), SIMDE_FLOAT16_VALUE( -834.50), SIMDE_FLOAT16_VALUE( 216.12) }, + { SIMDE_FLOAT16_VALUE( 996.00), SIMDE_FLOAT16_VALUE( 529.50), SIMDE_FLOAT16_VALUE( 79.06), SIMDE_FLOAT16_VALUE( 947.00), + SIMDE_FLOAT16_VALUE( 122.00), SIMDE_FLOAT16_VALUE( -250.00), SIMDE_FLOAT16_VALUE( -361.75), SIMDE_FLOAT16_VALUE( 265.25) }, + { SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 2.71), SIMDE_FLOAT16_VALUE( 99.81), SIMDE_FLOAT16_VALUE( -137.62), + SIMDE_FLOAT16_VALUE( -761.00), SIMDE_FLOAT16_VALUE( 813.00), SIMDE_FLOAT16_VALUE( -897.50), SIMDE_FLOAT16_VALUE( 653.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(58400.000000), SIMDE_FLOAT16_VALUE(59040.000000), SIMDE_FLOAT16_VALUE(5488.000000), SIMDE_FLOAT16_VALUE(4376.000000), + SIMDE_FLOAT16_VALUE(7296.000000), SIMDE_FLOAT16_VALUE(8016.000000), SIMDE_FLOAT16_VALUE(-22048.000000), SIMDE_FLOAT16_VALUE(-21008.000000) } }, + { { SIMDE_FLOAT16_VALUE( 396.00), SIMDE_FLOAT16_VALUE( 413.00), SIMDE_FLOAT16_VALUE( 514.00), SIMDE_FLOAT16_VALUE( -977.50), + SIMDE_FLOAT16_VALUE( -672.00), SIMDE_FLOAT16_VALUE( -92.12), SIMDE_FLOAT16_VALUE( -441.25), SIMDE_FLOAT16_VALUE( -374.25) }, + { SIMDE_FLOAT16_VALUE( -152.00), SIMDE_FLOAT16_VALUE( -79.56), SIMDE_FLOAT16_VALUE( -214.62), SIMDE_FLOAT16_VALUE( -615.00), + SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 784.00), SIMDE_FLOAT16_VALUE( 493.00), SIMDE_FLOAT16_VALUE( -896.00) }, + { SIMDE_FLOAT16_VALUE( 104.12), SIMDE_FLOAT16_VALUE( 78.50), SIMDE_FLOAT16_VALUE( 171.50), SIMDE_FLOAT16_VALUE( -682.50), + SIMDE_FLOAT16_VALUE( 217.12), SIMDE_FLOAT16_VALUE( 49.34), SIMDE_FLOAT16_VALUE( 256.50), SIMDE_FLOAT16_VALUE( -92.06) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-11536.000000), SIMDE_FLOAT16_VALUE(-11520.000000), SIMDE_FLOAT16_VALUE(-16336.000000), SIMDE_FLOAT16_VALUE(-17824.000000), + SIMDE_FLOAT16_VALUE(52640.000000), SIMDE_FLOAT16_VALUE(53216.000000), SIMDE_FLOAT16_VALUE(38272.000000), SIMDE_FLOAT16_VALUE(38336.000000) } }, + { { SIMDE_FLOAT16_VALUE( -728.00), SIMDE_FLOAT16_VALUE( -108.38), SIMDE_FLOAT16_VALUE( -77.88), SIMDE_FLOAT16_VALUE( -353.00), + SIMDE_FLOAT16_VALUE( -239.00), SIMDE_FLOAT16_VALUE( 704.50), SIMDE_FLOAT16_VALUE( 914.00), SIMDE_FLOAT16_VALUE( -211.12) }, + { SIMDE_FLOAT16_VALUE( -473.25), SIMDE_FLOAT16_VALUE( 74.38), SIMDE_FLOAT16_VALUE( 904.50), SIMDE_FLOAT16_VALUE( -290.50), + SIMDE_FLOAT16_VALUE( -796.00), SIMDE_FLOAT16_VALUE( 421.25), SIMDE_FLOAT16_VALUE( 215.75), SIMDE_FLOAT16_VALUE( 249.38) }, + { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -720.00), SIMDE_FLOAT16_VALUE( 32.75), SIMDE_FLOAT16_VALUE( -487.75), + SIMDE_FLOAT16_VALUE( -705.50), SIMDE_FLOAT16_VALUE( -468.00), SIMDE_FLOAT16_VALUE( -789.00), SIMDE_FLOAT16_VALUE( -866.00) }, + INT32_C( 2), + { SIMDE_FLOAT16_VALUE(-16224.000000), SIMDE_FLOAT16_VALUE(-15608.000000), SIMDE_FLOAT16_VALUE(29552.000000), SIMDE_FLOAT16_VALUE(29264.000000), + SIMDE_FLOAT16_VALUE(-26304.000000), SIMDE_FLOAT16_VALUE(-25360.000000), SIMDE_FLOAT16_VALUE(7980.000000), SIMDE_FLOAT16_VALUE(6856.000000) } }, + { { SIMDE_FLOAT16_VALUE( -891.50), SIMDE_FLOAT16_VALUE( -299.00), SIMDE_FLOAT16_VALUE( -595.00), SIMDE_FLOAT16_VALUE( -662.00), + SIMDE_FLOAT16_VALUE( -914.00), SIMDE_FLOAT16_VALUE( 674.50), SIMDE_FLOAT16_VALUE( 771.50), SIMDE_FLOAT16_VALUE( 14.33) }, + { SIMDE_FLOAT16_VALUE( 880.00), SIMDE_FLOAT16_VALUE( 767.00), SIMDE_FLOAT16_VALUE( -738.50), SIMDE_FLOAT16_VALUE( 581.50), + SIMDE_FLOAT16_VALUE( -342.00), SIMDE_FLOAT16_VALUE( 580.50), SIMDE_FLOAT16_VALUE( 534.00), SIMDE_FLOAT16_VALUE( -671.00) }, + { SIMDE_FLOAT16_VALUE( -482.75), SIMDE_FLOAT16_VALUE( 382.25), SIMDE_FLOAT16_VALUE( 503.00), SIMDE_FLOAT16_VALUE( 35.00), + SIMDE_FLOAT16_VALUE( 315.50), SIMDE_FLOAT16_VALUE( -23.56), SIMDE_FLOAT16_VALUE( 53.88), SIMDE_FLOAT16_VALUE( 722.00) }, + INT32_C( 3), + { SIMDE_FLOAT16_VALUE(29904.000000), SIMDE_FLOAT16_VALUE(30496.000000), SIMDE_FLOAT16_VALUE(-26448.000000), SIMDE_FLOAT16_VALUE(-26512.000000), + SIMDE_FLOAT16_VALUE(-12880.000000), SIMDE_FLOAT16_VALUE(-11296.000000), SIMDE_FLOAT16_VALUE(19456.000000), SIMDE_FLOAT16_VALUE(18704.000000) } }, + { { SIMDE_FLOAT16_VALUE( 525.50), SIMDE_FLOAT16_VALUE( -679.00), SIMDE_FLOAT16_VALUE( 491.50), SIMDE_FLOAT16_VALUE( -505.00), + SIMDE_FLOAT16_VALUE( 914.50), SIMDE_FLOAT16_VALUE( -312.00), SIMDE_FLOAT16_VALUE( -404.50), SIMDE_FLOAT16_VALUE( -634.00) }, + { SIMDE_FLOAT16_VALUE( -86.62), SIMDE_FLOAT16_VALUE( -914.50), SIMDE_FLOAT16_VALUE( -839.50), SIMDE_FLOAT16_VALUE( 817.50), + SIMDE_FLOAT16_VALUE( -187.25), SIMDE_FLOAT16_VALUE( 422.75), SIMDE_FLOAT16_VALUE( 604.50), SIMDE_FLOAT16_VALUE( -735.00) }, + { SIMDE_FLOAT16_VALUE( -21.31), SIMDE_FLOAT16_VALUE( -29.59), SIMDE_FLOAT16_VALUE( -725.00), SIMDE_FLOAT16_VALUE( -503.00), + SIMDE_FLOAT16_VALUE( -75.56), SIMDE_FLOAT16_VALUE( 215.38), SIMDE_FLOAT16_VALUE( -742.00), SIMDE_FLOAT16_VALUE( -854.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(2372.000000), SIMDE_FLOAT16_VALUE(1167.000000), SIMDE_FLOAT16_VALUE(18384.000000), SIMDE_FLOAT16_VALUE(17392.000000), + SIMDE_FLOAT16_VALUE(4904.000000), SIMDE_FLOAT16_VALUE(3678.000000), SIMDE_FLOAT16_VALUE(-13288.000000), SIMDE_FLOAT16_VALUE(-13520.000000) } }, + { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( 164.88), SIMDE_FLOAT16_VALUE( 304.75), SIMDE_FLOAT16_VALUE( -530.50), + SIMDE_FLOAT16_VALUE( -603.50), SIMDE_FLOAT16_VALUE( 730.50), SIMDE_FLOAT16_VALUE( 46.66), SIMDE_FLOAT16_VALUE( 629.00) }, + { SIMDE_FLOAT16_VALUE( -537.00), SIMDE_FLOAT16_VALUE( 637.00), SIMDE_FLOAT16_VALUE( 884.50), SIMDE_FLOAT16_VALUE( 378.25), + SIMDE_FLOAT16_VALUE( -10.17), SIMDE_FLOAT16_VALUE( -730.00), SIMDE_FLOAT16_VALUE( -981.50), SIMDE_FLOAT16_VALUE( 453.25) }, + { SIMDE_FLOAT16_VALUE( -575.00), SIMDE_FLOAT16_VALUE( 12.88), SIMDE_FLOAT16_VALUE( -667.50), SIMDE_FLOAT16_VALUE( 380.50), + SIMDE_FLOAT16_VALUE( 374.75), SIMDE_FLOAT16_VALUE( -222.50), SIMDE_FLOAT16_VALUE( 206.88), SIMDE_FLOAT16_VALUE( 502.25) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-7536.000000), SIMDE_FLOAT16_VALUE(-6752.000000), SIMDE_FLOAT16_VALUE(11696.000000), SIMDE_FLOAT16_VALUE(10864.000000), + SIMDE_FLOAT16_VALUE(-734.500000), SIMDE_FLOAT16_VALUE(599.500000), SIMDE_FLOAT16_VALUE(-12600.000000), SIMDE_FLOAT16_VALUE(-12016.000000) } }, + { { SIMDE_FLOAT16_VALUE( -825.50), SIMDE_FLOAT16_VALUE( -472.75), SIMDE_FLOAT16_VALUE( -531.00), SIMDE_FLOAT16_VALUE( -366.75), + SIMDE_FLOAT16_VALUE( 143.12), SIMDE_FLOAT16_VALUE( 698.50), SIMDE_FLOAT16_VALUE( 700.00), SIMDE_FLOAT16_VALUE( 498.25) }, + { SIMDE_FLOAT16_VALUE( 908.00), SIMDE_FLOAT16_VALUE( 845.50), SIMDE_FLOAT16_VALUE( -383.50), SIMDE_FLOAT16_VALUE( 383.50), + SIMDE_FLOAT16_VALUE( 357.75), SIMDE_FLOAT16_VALUE( -900.50), SIMDE_FLOAT16_VALUE( -802.00), SIMDE_FLOAT16_VALUE( 966.50) }, + { SIMDE_FLOAT16_VALUE( -993.00), SIMDE_FLOAT16_VALUE( 477.50), SIMDE_FLOAT16_VALUE( -23.00), SIMDE_FLOAT16_VALUE( 102.38), + SIMDE_FLOAT16_VALUE( 988.50), SIMDE_FLOAT16_VALUE( -311.75), SIMDE_FLOAT16_VALUE( -668.50), SIMDE_FLOAT16_VALUE( 148.25) }, + INT32_C( 2), + { SIMDE_FLOAT16_VALUE(-21712.000000), SIMDE_FLOAT16_VALUE(-21360.000000), SIMDE_FLOAT16_VALUE(8288.000000), SIMDE_FLOAT16_VALUE(8456.000000), + SIMDE_FLOAT16_VALUE(-8084.000000), SIMDE_FLOAT16_VALUE(-7528.000000), SIMDE_FLOAT16_VALUE(19152.000000), SIMDE_FLOAT16_VALUE(18944.000000) } }, + { { SIMDE_FLOAT16_VALUE( 213.88), SIMDE_FLOAT16_VALUE( 337.75), SIMDE_FLOAT16_VALUE( 330.50), SIMDE_FLOAT16_VALUE( -88.56), + SIMDE_FLOAT16_VALUE( 191.12), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 775.50) }, + { SIMDE_FLOAT16_VALUE( 295.50), SIMDE_FLOAT16_VALUE( 687.00), SIMDE_FLOAT16_VALUE( 406.25), SIMDE_FLOAT16_VALUE( 439.50), + SIMDE_FLOAT16_VALUE( -827.50), SIMDE_FLOAT16_VALUE( 733.00), SIMDE_FLOAT16_VALUE( 499.00), SIMDE_FLOAT16_VALUE( 931.00) }, + { SIMDE_FLOAT16_VALUE( 790.00), SIMDE_FLOAT16_VALUE( -979.00), SIMDE_FLOAT16_VALUE( 70.62), SIMDE_FLOAT16_VALUE( -47.00), + SIMDE_FLOAT16_VALUE( 228.50), SIMDE_FLOAT16_VALUE( -233.50), SIMDE_FLOAT16_VALUE( -467.50), SIMDE_FLOAT16_VALUE( 545.00) }, + INT32_C( 3), + { SIMDE_FLOAT16_VALUE(-13672.000000), SIMDE_FLOAT16_VALUE(-13552.000000), SIMDE_FLOAT16_VALUE(-18768.000000), SIMDE_FLOAT16_VALUE(-19184.000000), + SIMDE_FLOAT16_VALUE(39072.000000), SIMDE_FLOAT16_VALUE(38272.000000), SIMDE_FLOAT16_VALUE(-23568.000000), SIMDE_FLOAT16_VALUE(-22672.000000) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16x8_t r; + // = simde_vcmlaq_laneq_f16(r_, a, b, test_vec[i].lane); + // simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + SIMDE_CONSTIFY_4_(simde_vcmlaq_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + // write_f16x8(r); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t r = simde_vcmlaq_laneq_f16(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmlaq_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32_t r_[4]; + simde_float32_t a[4]; + simde_float32_t b[4]; + const int lane; + simde_float32_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 355.18), SIMDE_FLOAT32_C( 169.63), SIMDE_FLOAT32_C( 116.87), SIMDE_FLOAT32_C( -467.19) }, + { SIMDE_FLOAT32_C( -513.94), SIMDE_FLOAT32_C( -999.71), SIMDE_FLOAT32_C( -285.25), SIMDE_FLOAT32_C( -931.88) }, + { SIMDE_FLOAT32_C( -839.57), SIMDE_FLOAT32_C( -681.40), SIMDE_FLOAT32_C( -117.60), SIMDE_FLOAT32_C( -459.86) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(431843.781250), SIMDE_FLOAT32_C(431658.250000), SIMDE_FLOAT32_C(239604.218750), SIMDE_FLOAT32_C(239020.156250) } }, + { { SIMDE_FLOAT32_C( -57.67), SIMDE_FLOAT32_C( -897.07), SIMDE_FLOAT32_C( 118.98), SIMDE_FLOAT32_C( -387.92) }, + { SIMDE_FLOAT32_C( -362.79), SIMDE_FLOAT32_C( 160.99), SIMDE_FLOAT32_C( -2.72), SIMDE_FLOAT32_C( 206.65) }, + { SIMDE_FLOAT32_C( 49.34), SIMDE_FLOAT32_C( 511.85), SIMDE_FLOAT32_C( 547.20), SIMDE_FLOAT32_C( -119.58) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-185751.734375), SIMDE_FLOAT32_C(-186591.140625), SIMDE_FLOAT32_C(-1273.252075), SIMDE_FLOAT32_C(-1780.152100) } }, + { { SIMDE_FLOAT32_C( -219.54), SIMDE_FLOAT32_C( -959.14), SIMDE_FLOAT32_C( 943.92), SIMDE_FLOAT32_C( 628.48) }, + { SIMDE_FLOAT32_C( 446.65), SIMDE_FLOAT32_C( -500.77), SIMDE_FLOAT32_C( -347.79), SIMDE_FLOAT32_C( 813.11) }, + { SIMDE_FLOAT32_C( -542.25), SIMDE_FLOAT32_C( 232.48), SIMDE_FLOAT32_C( 684.35), SIMDE_FLOAT32_C( 710.26) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-242415.500000), SIMDE_FLOAT32_C(-243155.093750), SIMDE_FLOAT32_C(189533.046875), SIMDE_FLOAT32_C(189217.609375) } }, + { { SIMDE_FLOAT32_C( 783.09), SIMDE_FLOAT32_C( -727.02), SIMDE_FLOAT32_C( -586.46), SIMDE_FLOAT32_C( 64.33) }, + { SIMDE_FLOAT32_C( -490.08), SIMDE_FLOAT32_C( 740.49), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -759.78) }, + { SIMDE_FLOAT32_C( -380.84), SIMDE_FLOAT32_C( 993.01), SIMDE_FLOAT32_C( -759.56), SIMDE_FLOAT32_C( 861.16) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-485871.250000), SIMDE_FLOAT32_C(-487381.343750), SIMDE_FLOAT32_C(-588011.437500), SIMDE_FLOAT32_C(-587360.687500) } }, + { { SIMDE_FLOAT32_C( 998.31), SIMDE_FLOAT32_C( 538.40), SIMDE_FLOAT32_C( -191.12), SIMDE_FLOAT32_C( -434.48) }, + { SIMDE_FLOAT32_C( 592.83), SIMDE_FLOAT32_C( 820.32), SIMDE_FLOAT32_C( -296.84), SIMDE_FLOAT32_C( -612.30) }, + { SIMDE_FLOAT32_C( -552.34), SIMDE_FLOAT32_C( 329.08), SIMDE_FLOAT32_C( 765.26), SIMDE_FLOAT32_C( -531.08) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-326445.437500), SIMDE_FLOAT32_C(-326905.343750), SIMDE_FLOAT32_C(163765.484375), SIMDE_FLOAT32_C(163522.125000) } }, + { { SIMDE_FLOAT32_C( 52.61), SIMDE_FLOAT32_C( 606.93), SIMDE_FLOAT32_C( -894.22), SIMDE_FLOAT32_C( -854.38) }, + { SIMDE_FLOAT32_C( 972.80), SIMDE_FLOAT32_C( -807.39), SIMDE_FLOAT32_C( 668.59), SIMDE_FLOAT32_C( -228.19) }, + { SIMDE_FLOAT32_C( -528.51), SIMDE_FLOAT32_C( 730.93), SIMDE_FLOAT32_C( -230.95), SIMDE_FLOAT32_C( -140.17) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(711101.312500), SIMDE_FLOAT32_C(711655.625000), SIMDE_FLOAT32_C(487798.281250), SIMDE_FLOAT32_C(487838.125000) } }, + { { SIMDE_FLOAT32_C( 556.73), SIMDE_FLOAT32_C( -701.90), SIMDE_FLOAT32_C( -356.50), SIMDE_FLOAT32_C( -532.42) }, + { SIMDE_FLOAT32_C( 856.94), SIMDE_FLOAT32_C( -261.67), SIMDE_FLOAT32_C( -208.07), SIMDE_FLOAT32_C( 27.93) }, + { SIMDE_FLOAT32_C( 924.32), SIMDE_FLOAT32_C( -863.60), SIMDE_FLOAT32_C( -687.65), SIMDE_FLOAT32_C( 238.39) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(792643.500000), SIMDE_FLOAT32_C(791384.875000), SIMDE_FLOAT32_C(-192679.765625), SIMDE_FLOAT32_C(-192855.687500) } }, + { { SIMDE_FLOAT32_C( -286.79), SIMDE_FLOAT32_C( 630.61), SIMDE_FLOAT32_C( -989.22), SIMDE_FLOAT32_C( 223.21) }, + { SIMDE_FLOAT32_C( 812.31), SIMDE_FLOAT32_C( 667.33), SIMDE_FLOAT32_C( 841.41), SIMDE_FLOAT32_C( 735.52) }, + { SIMDE_FLOAT32_C( 308.52), SIMDE_FLOAT32_C( -189.06), SIMDE_FLOAT32_C( -63.33), SIMDE_FLOAT32_C( 837.76) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-153862.109375), SIMDE_FLOAT32_C(-152944.718750), SIMDE_FLOAT32_C(-160066.187500), SIMDE_FLOAT32_C(-158853.750000) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); + simde_float32x4_t r; + SIMDE_CONSTIFY_2_(simde_vcmlaq_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t r = simde_vcmlaq_laneq_f32(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vcmla_lane_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmla_lane_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmla_laneq_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmla_laneq_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_lane_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_lane_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_laneq_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_laneq_f32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" \ No newline at end of file From 596cafcb0b399c3de8bed45451b64bd362813620 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Mon, 16 Oct 2023 17:39:37 +0800 Subject: [PATCH 04/29] [Neon] Add vcmla_rot90_lane_f{16/32} and vcmla_rot90_laneq_f{16/32} and vcmlaq_rot90_lane_f{16/32} and vcmlaq_rot90_laneq_f{16/32} --- simde/arm/neon/cmla_rot90_lane.h | 321 +++++++++++++ test/arm/neon/cmla_rot90_lane.c | 762 +++++++++++++++++++++++++++++++ 2 files changed, 1083 insertions(+) create mode 100644 simde/arm/neon/cmla_rot90_lane.h create mode 100644 test/arm/neon/cmla_rot90_lane.c diff --git a/simde/arm/neon/cmla_rot90_lane.h b/simde/arm/neon/cmla_rot90_lane.h new file mode 100644 index 000000000..86090922e --- /dev/null +++ b/simde/arm/neon/cmla_rot90_lane.h @@ -0,0 +1,321 @@ +/* SPDX-License-Identifier: MIT +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, copy, +* modify, merge, publish, distribute, sublicense, and/or sell copies +* of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +* +* Copyright: +* 2023 Chi-Wei Chu +*/ + +#if !defined(SIMDE_ARM_NEON_CMLA_ROT90_LANE_H) +#define SIMDE_ARM_NEON_CMLA_ROT90_LANE_H + +#include "types.h" +#include "dup_lane.h" +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vcmla_rot90_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float16x4_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_2_(vcmla_rot90_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float16x4_private + r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } + #endif + result = simde_float16x4_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmla_rot90_lane_f16 + #define vcmla_rot90_lane_f16(r, a, b, lane) simde_vcmla_rot90_lane_f16(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcmla_rot90_lane_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcmla_rot90_lane_f32(r, a, b, 0); + #else + simde_float32x2_private + r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x2_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmla_rot90_lane_f32 + #define vcmla_rot90_lane_f32(r, a, b, lane) simde_vcmla_rot90_lane_f32(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float16x8_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_2_(vcmlaq_rot90_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float16x8_private + r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16(simde_float16x4_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, 5, 12, 7, 14); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } + #endif + + result = simde_float16x8_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_rot90_lane_f16 + #define vcmlaq_rot90_lane_f16(r, a, b, lane) simde_vcmlaq_rot90_lane_f16(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcmlaq_rot90_lane_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcmlaq_rot90_lane_f32(r, a, b, 0); + #else + simde_float32x4_private + r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x2_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_rot90_lane_f32 + #define vcmlaq_rot90_lane_f32(r, a, b, lane) simde_vcmlaq_rot90_lane_f32(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vcmla_rot90_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float16x4_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_2_(vcmla_rot90_laneq_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float16x4_private + r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } + #endif + + result = simde_float16x4_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmla_rot90_laneq_f16 + #define vcmla_rot90_laneq_f16(r, a, b, lane) simde_vcmla_rot90_laneq_f16(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcmla_rot90_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x2_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_2_(vcmla_rot90_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float32x2_private + r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x4_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } + #endif + + result = simde_float32x2_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmla_rot90_laneq_f32 + #define vcmla_rot90_laneq_f32(r, a, b, lane) simde_vcmla_rot90_laneq_f32(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_float16x8_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_4_(vcmlaq_rot90_laneq_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float16x8_private + r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16(simde_float16x8_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, 5, 12, 7, 14); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } + #endif + + result = simde_float16x8_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_rot90_laneq_f16 + #define vcmlaq_rot90_laneq_f16(r, a, b, lane) simde_vcmlaq_rot90_laneq_f16(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcmlaq_rot90_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x4_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_2_(vcmlaq_rot90_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float32x4_private + r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x4_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } + #endif + result = simde_float32x4_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_rot90_laneq_f32 + #define vcmlaq_rot90_laneq_f32(r, a, b, lane) simde_vcmlaq_rot90_laneq_f32(r, a, b, lane) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT90_LANE_H) */ \ No newline at end of file diff --git a/test/arm/neon/cmla_rot90_lane.c b/test/arm/neon/cmla_rot90_lane.c new file mode 100644 index 000000000..076a2fc49 --- /dev/null +++ b/test/arm/neon/cmla_rot90_lane.c @@ -0,0 +1,762 @@ +#define SIMDE_TEST_ARM_NEON_INSN cmla_rot90_lane + +#include "test-neon.h" +#include "../../../simde/arm/neon/cmla_rot90_lane.h" +#include "../../../simde/arm/neon/dup_n.h" + +static int +test_simde_vcmla_rot90_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t r_[4]; + simde_float16_t a[4]; + simde_float16_t b[4]; + const int lane; + simde_float16_t r[4]; + } test_vec[] = { + { + { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) }, + { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -31.19) }, + { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -48.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( -487.75), SIMDE_FLOAT16_VALUE( 329.50), SIMDE_FLOAT16_VALUE( 711.50), SIMDE_FLOAT16_VALUE( -1905.00) } }, + { { SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, + { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -14.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, + { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 61.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 1593.00), SIMDE_FLOAT16_VALUE( -760.00), SIMDE_FLOAT16_VALUE(-44736.00), SIMDE_FLOAT16_VALUE( 45248.00) } }, + { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, + { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, + { SIMDE_FLOAT16_VALUE( -61.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-59136.00), SIMDE_FLOAT16_VALUE( 58496.00), SIMDE_FLOAT16_VALUE( 28944.00), SIMDE_FLOAT16_VALUE(-29152.00) } }, + { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -180.50) }, + { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 129.00), SIMDE_FLOAT16_VALUE( 99.06), SIMDE_FLOAT16_VALUE( -75.25) }, + { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 239.50), SIMDE_FLOAT16_VALUE( -29.96), SIMDE_FLOAT16_VALUE( -177.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-30800.00), SIMDE_FLOAT16_VALUE( 30688.00), SIMDE_FLOAT16_VALUE( 17888.00), SIMDE_FLOAT16_VALUE(-18208.00) } }, + { { SIMDE_FLOAT16_VALUE( 167.25), SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( -63.38), SIMDE_FLOAT16_VALUE( 57.00) }, + { SIMDE_FLOAT16_VALUE( 191.75), SIMDE_FLOAT16_VALUE( -197.00), SIMDE_FLOAT16_VALUE( 285.00), SIMDE_FLOAT16_VALUE( -529.00) }, + { SIMDE_FLOAT16_VALUE( -80.50), SIMDE_FLOAT16_VALUE( 375.50), SIMDE_FLOAT16_VALUE( -206.00), SIMDE_FLOAT16_VALUE( -75.25) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-15688.00), SIMDE_FLOAT16_VALUE( 15856.00), SIMDE_FLOAT16_VALUE(-42656.00), SIMDE_FLOAT16_VALUE( 42656.00) } }, + { { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( -41.00), SIMDE_FLOAT16_VALUE( 285.00) }, + { SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -271.25), SIMDE_FLOAT16_VALUE( -257.50) }, + { SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( -180.50), SIMDE_FLOAT16_VALUE( 69.62), SIMDE_FLOAT16_VALUE( 131.38) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 1813.00), SIMDE_FLOAT16_VALUE( -1920.00), SIMDE_FLOAT16_VALUE(-46528.00), SIMDE_FLOAT16_VALUE( 46752.00) } }, + { { SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -247.00), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 17.94) }, + { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( -110.75), SIMDE_FLOAT16_VALUE( 18.20) }, + { SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 97.31) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-14712.00), SIMDE_FLOAT16_VALUE( 14672.00), SIMDE_FLOAT16_VALUE( -1574.00), SIMDE_FLOAT16_VALUE( 1653.00) } }, + { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00) }, + { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( -151.12) }, + { SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44), SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( 75.88) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 13160.00), SIMDE_FLOAT16_VALUE(-14744.00), SIMDE_FLOAT16_VALUE(-11400.00), SIMDE_FLOAT16_VALUE( 12432.00) } } + + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x4_t r; + SIMDE_CONSTIFY_2_(simde_vcmla_rot90_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + // simde_float16x4_t r = simde_vcmla_rot90_lane_f16(r_, a, b, test_vec[i].lane); + + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + // simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t r = simde_vcmla_rot90_lane_f16(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmla_rot90_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32_t r_[2]; + simde_float32_t a[2]; + simde_float32_t b[2]; + const int lane; + simde_float32_t r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 83.21), SIMDE_FLOAT32_C( 417.90) }, + { SIMDE_FLOAT32_C( -875.72), SIMDE_FLOAT32_C( 830.54) }, + { SIMDE_FLOAT32_C( -633.53), SIMDE_FLOAT32_C( 832.17) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(526255.250000), SIMDE_FLOAT32_C(-525754.125000) } }, + { { SIMDE_FLOAT32_C( -890.17), SIMDE_FLOAT32_C( 649.92) }, + { SIMDE_FLOAT32_C( -111.22), SIMDE_FLOAT32_C( -830.36) }, + { SIMDE_FLOAT32_C( 59.76), SIMDE_FLOAT32_C( 970.61) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(48732.140625), SIMDE_FLOAT32_C(-48972.390625) } }, + { { SIMDE_FLOAT32_C( 522.31), SIMDE_FLOAT32_C( -822.40) }, + { SIMDE_FLOAT32_C( 411.34), SIMDE_FLOAT32_C( -692.35) }, + { SIMDE_FLOAT32_C( 648.71), SIMDE_FLOAT32_C( 385.20) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(449656.687500), SIMDE_FLOAT32_C(-449956.781250) } }, + { { SIMDE_FLOAT32_C( 479.18), SIMDE_FLOAT32_C( -793.73) }, + { SIMDE_FLOAT32_C( -740.26), SIMDE_FLOAT32_C( 245.04) }, + { SIMDE_FLOAT32_C( 229.26), SIMDE_FLOAT32_C( -113.23) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-55698.687500), SIMDE_FLOAT32_C(55384.136719) } }, + { { SIMDE_FLOAT32_C( 331.48), SIMDE_FLOAT32_C( -677.34) }, + { SIMDE_FLOAT32_C( 97.30), SIMDE_FLOAT32_C( -52.10) }, + { SIMDE_FLOAT32_C( 239.17), SIMDE_FLOAT32_C( 469.68) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(12792.236328), SIMDE_FLOAT32_C(-13138.096680) } }, + { { SIMDE_FLOAT32_C( -543.40), SIMDE_FLOAT32_C( 826.85) }, + { SIMDE_FLOAT32_C( 226.38), SIMDE_FLOAT32_C( 178.84) }, + { SIMDE_FLOAT32_C( 181.71), SIMDE_FLOAT32_C( 420.52) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-33040.417969), SIMDE_FLOAT32_C(33323.867188) } }, + { { SIMDE_FLOAT32_C( -698.84), SIMDE_FLOAT32_C( -151.15) }, + { SIMDE_FLOAT32_C( -388.27), SIMDE_FLOAT32_C( 350.81) }, + { SIMDE_FLOAT32_C( 890.40), SIMDE_FLOAT32_C( -664.75) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-313060.062500), SIMDE_FLOAT32_C(312210.093750) } }, + { { SIMDE_FLOAT32_C( -617.94), SIMDE_FLOAT32_C( 190.84) }, + { SIMDE_FLOAT32_C( 218.13), SIMDE_FLOAT32_C( -328.97) }, + { SIMDE_FLOAT32_C( -549.59), SIMDE_FLOAT32_C( -459.89) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-181416.578125), SIMDE_FLOAT32_C(180989.468750) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); + simde_float32x2_t r = simde_vcmla_rot90_lane_f32(r_, a, b, 0); + // SIMDE_CONSTIFY_2_(simde_vcmla_rot90_lane_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t r = simde_vcmla_rot90_lane_f32(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + + +static int +test_simde_vcmla_rot90_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t r_[4]; + simde_float16_t a[4]; + simde_float16_t b[8]; + const int lane; + simde_float16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 816.50), SIMDE_FLOAT16_VALUE( 773.50), SIMDE_FLOAT16_VALUE( -377.75), SIMDE_FLOAT16_VALUE( 544.50) }, + { SIMDE_FLOAT16_VALUE( -188.38), SIMDE_FLOAT16_VALUE( 269.75), SIMDE_FLOAT16_VALUE( 115.62), SIMDE_FLOAT16_VALUE( -544.00) }, + { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( 484.00), SIMDE_FLOAT16_VALUE( -757.00), + SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-10760.00), SIMDE_FLOAT16_VALUE( 12344.00), SIMDE_FLOAT16_VALUE( 22960.00), SIMDE_FLOAT16_VALUE(-22800.00) } }, + { { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, + { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, + { SIMDE_FLOAT16_VALUE( -899.00), SIMDE_FLOAT16_VALUE( 32.51), SIMDE_FLOAT16_VALUE( 359.00), SIMDE_FLOAT16_VALUE( 342.25), + SIMDE_FLOAT16_VALUE( 913.00), SIMDE_FLOAT16_VALUE( 740.50), SIMDE_FLOAT16_VALUE( -436.25), SIMDE_FLOAT16_VALUE( -904.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -4952.00), SIMDE_FLOAT16_VALUE( 5132.00), SIMDE_FLOAT16_VALUE( -2424.00), SIMDE_FLOAT16_VALUE( 3010.00) } }, + { { SIMDE_FLOAT16_VALUE( -335.25), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( 227.25), SIMDE_FLOAT16_VALUE( -8.67) }, + { SIMDE_FLOAT16_VALUE( 490.50), SIMDE_FLOAT16_VALUE( 507.50), SIMDE_FLOAT16_VALUE( -979.50), SIMDE_FLOAT16_VALUE( -718.00) }, + { SIMDE_FLOAT16_VALUE( -10.20), SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -797.50), SIMDE_FLOAT16_VALUE( 741.50), + SIMDE_FLOAT16_VALUE( -835.00), SIMDE_FLOAT16_VALUE( -308.00), SIMDE_FLOAT16_VALUE( -674.50), SIMDE_FLOAT16_VALUE( -247.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 4844.00), SIMDE_FLOAT16_VALUE( -5432.00), SIMDE_FLOAT16_VALUE( -7100.00), SIMDE_FLOAT16_VALUE( 7316.00) } }, + { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 25.50), SIMDE_FLOAT16_VALUE( -44.50) }, + { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -66.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, + { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 85.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50), + SIMDE_FLOAT16_VALUE( -975.50), SIMDE_FLOAT16_VALUE( 383.50), SIMDE_FLOAT16_VALUE( 500.25), SIMDE_FLOAT16_VALUE( 517.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 5440.00), SIMDE_FLOAT16_VALUE( -6080.00), SIMDE_FLOAT16_VALUE(-39584.00), SIMDE_FLOAT16_VALUE( 39552.00) } }, + { { SIMDE_FLOAT16_VALUE( 540.00), SIMDE_FLOAT16_VALUE( -927.50), SIMDE_FLOAT16_VALUE( 17.94), SIMDE_FLOAT16_VALUE( 340.75) }, + { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( 854.00), SIMDE_FLOAT16_VALUE( -110.75) }, + { SIMDE_FLOAT16_VALUE( -53.36), SIMDE_FLOAT16_VALUE( -465.00), SIMDE_FLOAT16_VALUE( -104.00), SIMDE_FLOAT16_VALUE( -348.00), + SIMDE_FLOAT16_VALUE( -370.75), SIMDE_FLOAT16_VALUE( 18.20), SIMDE_FLOAT16_VALUE( -209.25), SIMDE_FLOAT16_VALUE( -856.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 9408.00), SIMDE_FLOAT16_VALUE( -9792.00), SIMDE_FLOAT16_VALUE( -5892.00), SIMDE_FLOAT16_VALUE( 6252.00) } }, + { { SIMDE_FLOAT16_VALUE( 251.25), SIMDE_FLOAT16_VALUE( 523.50), SIMDE_FLOAT16_VALUE( -62.34), SIMDE_FLOAT16_VALUE( -412.00) }, + { SIMDE_FLOAT16_VALUE( 818.50), SIMDE_FLOAT16_VALUE( -266.25), SIMDE_FLOAT16_VALUE( 558.50), SIMDE_FLOAT16_VALUE( -474.75) }, + { SIMDE_FLOAT16_VALUE( 583.50), SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 875.50), SIMDE_FLOAT16_VALUE( 382.50), + SIMDE_FLOAT16_VALUE( -535.00), SIMDE_FLOAT16_VALUE( 374.50), SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 432.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 15872.00), SIMDE_FLOAT16_VALUE(-15096.00), SIMDE_FLOAT16_VALUE( 27792.00), SIMDE_FLOAT16_VALUE(-28256.00) } }, + { { SIMDE_FLOAT16_VALUE( 83.19), SIMDE_FLOAT16_VALUE( 418.00), SIMDE_FLOAT16_VALUE( -875.50), SIMDE_FLOAT16_VALUE( 830.50) }, + { SIMDE_FLOAT16_VALUE( -633.50), SIMDE_FLOAT16_VALUE( 832.00), SIMDE_FLOAT16_VALUE( -890.00), SIMDE_FLOAT16_VALUE( 650.00) }, + { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -830.50), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( 970.50), + SIMDE_FLOAT16_VALUE( 522.50), SIMDE_FLOAT16_VALUE( -822.50), SIMDE_FLOAT16_VALUE( 411.25), SIMDE_FLOAT16_VALUE( -692.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 26032.00), SIMDE_FLOAT16_VALUE(-25536.00), SIMDE_FLOAT16_VALUE( 19392.00), SIMDE_FLOAT16_VALUE(-19440.00) } }, + { { SIMDE_FLOAT16_VALUE( 648.50), SIMDE_FLOAT16_VALUE( 385.25), SIMDE_FLOAT16_VALUE( 479.25), SIMDE_FLOAT16_VALUE( -793.50) }, + { SIMDE_FLOAT16_VALUE( -740.50), SIMDE_FLOAT16_VALUE( 245.00), SIMDE_FLOAT16_VALUE( 229.25), SIMDE_FLOAT16_VALUE( -113.25) }, + { SIMDE_FLOAT16_VALUE( 331.50), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( 97.31), SIMDE_FLOAT16_VALUE( -52.09), + SIMDE_FLOAT16_VALUE( 239.12), SIMDE_FLOAT16_VALUE( 469.75), SIMDE_FLOAT16_VALUE( -543.50), SIMDE_FLOAT16_VALUE( 827.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 19088.00), SIMDE_FLOAT16_VALUE(-18048.00), SIMDE_FLOAT16_VALUE( -8044.00), SIMDE_FLOAT16_VALUE( 7728.00) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16x4_t r; + // simde_float16x4_t r = simde_vcmla_rot90_laneq_f16(r_, a, b, test_vec[i].lane); + // simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + SIMDE_CONSTIFY_2_(simde_vcmla_rot90_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x4_t r = simde_vcmla_rot90_laneq_f16(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmla_rot90_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32_t r_[2]; + simde_float32_t a[2]; + simde_float32_t b[4]; + const int lane; + simde_float32_t r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( -470.26), SIMDE_FLOAT32_C( 81.87) }, + { SIMDE_FLOAT32_C( 72.27), SIMDE_FLOAT32_C( -992.59) }, + { SIMDE_FLOAT32_C( -615.67), SIMDE_FLOAT32_C( 620.28), SIMDE_FLOAT32_C( -606.51), SIMDE_FLOAT32_C( -327.77) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-611578.125000), SIMDE_FLOAT32_C(611189.750000) } }, + { { SIMDE_FLOAT32_C( -331.80), SIMDE_FLOAT32_C( -605.94) }, + { SIMDE_FLOAT32_C( -295.86), SIMDE_FLOAT32_C( -275.42) }, + { SIMDE_FLOAT32_C( -752.64), SIMDE_FLOAT32_C( 21.91), SIMDE_FLOAT32_C( 826.83), SIMDE_FLOAT32_C( 599.82) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(5702.652344), SIMDE_FLOAT32_C(-6640.392578) } }, + { { SIMDE_FLOAT32_C( -30.36), SIMDE_FLOAT32_C( 631.53) }, + { SIMDE_FLOAT32_C( 850.75), SIMDE_FLOAT32_C( -263.55) }, + { SIMDE_FLOAT32_C( 139.96), SIMDE_FLOAT32_C( 859.14), SIMDE_FLOAT32_C( -834.47), SIMDE_FLOAT32_C( 216.10) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(36856.097656), SIMDE_FLOAT32_C(-36254.929688) } }, + { { SIMDE_FLOAT32_C( 995.86), SIMDE_FLOAT32_C( 529.74) }, + { SIMDE_FLOAT32_C( 79.08), SIMDE_FLOAT32_C( 947.13) }, + { SIMDE_FLOAT32_C( 122.02), SIMDE_FLOAT32_C( -250.00), SIMDE_FLOAT32_C( -361.82), SIMDE_FLOAT32_C( 265.24) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(237778.359375), SIMDE_FLOAT32_C(-236252.765625) } }, + { { SIMDE_FLOAT32_C( 275.71), SIMDE_FLOAT32_C( 2.71) }, + { SIMDE_FLOAT32_C( 99.79), SIMDE_FLOAT32_C( -137.67) }, + { SIMDE_FLOAT32_C( -761.19), SIMDE_FLOAT32_C( 813.19), SIMDE_FLOAT32_C( -897.68), SIMDE_FLOAT32_C( 653.58) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-104517.312500), SIMDE_FLOAT32_C(104795.734375) } }, + { { SIMDE_FLOAT32_C( 396.02), SIMDE_FLOAT32_C( 413.06) }, + { SIMDE_FLOAT32_C( 514.09), SIMDE_FLOAT32_C( -977.67) }, + { SIMDE_FLOAT32_C( -671.79), SIMDE_FLOAT32_C( -92.13), SIMDE_FLOAT32_C( -441.32), SIMDE_FLOAT32_C( -374.27) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-89676.710938), SIMDE_FLOAT32_C(90485.789062) } }, + { { SIMDE_FLOAT32_C( -151.97), SIMDE_FLOAT32_C( -79.55) }, + { SIMDE_FLOAT32_C( -214.62), SIMDE_FLOAT32_C( -614.75) }, + { SIMDE_FLOAT32_C( 678.79), SIMDE_FLOAT32_C( 783.83), SIMDE_FLOAT32_C( 493.05), SIMDE_FLOAT32_C( -896.00) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(417134.156250), SIMDE_FLOAT32_C(-417365.687500) } }, + { { SIMDE_FLOAT32_C( 104.13), SIMDE_FLOAT32_C( 278.54) }, + { SIMDE_FLOAT32_C( 171.54), SIMDE_FLOAT32_C( -682.63) }, + { SIMDE_FLOAT32_C( 217.09), SIMDE_FLOAT32_C( 49.35), SIMDE_FLOAT32_C( 256.50), SIMDE_FLOAT32_C( -92.04) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(33791.917969), SIMDE_FLOAT32_C(-33409.250000) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); + simde_float32x2_t r; + SIMDE_CONSTIFY_2_(simde_vcmla_rot90_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + + simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); + // simde_float32x2_t r = simde_vcmla_rot90_laneq_f32(r_, a, b, test_vec[i].lane); + // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x2_t r = simde_vcmla_rot90_laneq_f32(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmlaq_rot90_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t r_[8]; + simde_float16_t a[8]; + simde_float16_t b[4]; + const int lane; + simde_float16_t r[8]; + } test_vec[] = { + + { { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( 420.50), + SIMDE_FLOAT16_VALUE( -699.00), SIMDE_FLOAT16_VALUE( -151.12), SIMDE_FLOAT16_VALUE( -388.25), SIMDE_FLOAT16_VALUE( 350.75) }, + { SIMDE_FLOAT16_VALUE( 890.50), SIMDE_FLOAT16_VALUE( -665.00), SIMDE_FLOAT16_VALUE( -618.00), SIMDE_FLOAT16_VALUE( 190.88), + SIMDE_FLOAT16_VALUE( 218.12), SIMDE_FLOAT16_VALUE( -329.00), SIMDE_FLOAT16_VALUE( -549.50), SIMDE_FLOAT16_VALUE( -460.00) }, + { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -695.50), SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-32544.00), SIMDE_FLOAT16_VALUE( 32960.00), SIMDE_FLOAT16_VALUE( 9592.00), SIMDE_FLOAT16_VALUE( -8984.00), + SIMDE_FLOAT16_VALUE(-16912.00), SIMDE_FLOAT16_VALUE( 16064.00), SIMDE_FLOAT16_VALUE(-23056.00), SIMDE_FLOAT16_VALUE( 23024.00) } }, + { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00), + SIMDE_FLOAT16_VALUE( -786.50), SIMDE_FLOAT16_VALUE( -974.00), SIMDE_FLOAT16_VALUE( -570.50), SIMDE_FLOAT16_VALUE( 369.25) }, + { SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( -40.00), SIMDE_FLOAT16_VALUE( -52.00), SIMDE_FLOAT16_VALUE( 75.88), + SIMDE_FLOAT16_VALUE( 352.25), SIMDE_FLOAT16_VALUE( 324.25), SIMDE_FLOAT16_VALUE( 485.50), SIMDE_FLOAT16_VALUE( 43.50) }, + { SIMDE_FLOAT16_VALUE( -976.00), SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -100.62), SIMDE_FLOAT16_VALUE( -463.75) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 2708.00), SIMDE_FLOAT16_VALUE( -4288.00), SIMDE_FLOAT16_VALUE( -6328.00), SIMDE_FLOAT16_VALUE( 7364.00), + SIMDE_FLOAT16_VALUE(-29792.00), SIMDE_FLOAT16_VALUE( 28032.00), SIMDE_FLOAT16_VALUE( -4460.00), SIMDE_FLOAT16_VALUE( 4260.00) } }, + { { SIMDE_FLOAT16_VALUE( -973.00), SIMDE_FLOAT16_VALUE( -159.62), SIMDE_FLOAT16_VALUE( -377.00), SIMDE_FLOAT16_VALUE( -853.00), + SIMDE_FLOAT16_VALUE( 831.00), SIMDE_FLOAT16_VALUE( 661.00), SIMDE_FLOAT16_VALUE( -717.00), SIMDE_FLOAT16_VALUE( 891.50) }, + { SIMDE_FLOAT16_VALUE( 535.50), SIMDE_FLOAT16_VALUE( 621.50), SIMDE_FLOAT16_VALUE( 578.50), SIMDE_FLOAT16_VALUE( 430.50), + SIMDE_FLOAT16_VALUE( -574.50), SIMDE_FLOAT16_VALUE( -319.75), SIMDE_FLOAT16_VALUE( -735.00), SIMDE_FLOAT16_VALUE( 248.50) }, + { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 192.38), SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 502.25) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-21232.00), SIMDE_FLOAT16_VALUE( 20096.00), SIMDE_FLOAT16_VALUE(-14408.00), SIMDE_FLOAT16_VALUE( 13176.00), + SIMDE_FLOAT16_VALUE( 11256.00), SIMDE_FLOAT16_VALUE( -9760.00), SIMDE_FLOAT16_VALUE( -8816.00), SIMDE_FLOAT16_VALUE( 8992.00) } }, + { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( -70.81), SIMDE_FLOAT16_VALUE( -596.50), SIMDE_FLOAT16_VALUE( 810.50), + SIMDE_FLOAT16_VALUE( 863.50), SIMDE_FLOAT16_VALUE( 828.50), SIMDE_FLOAT16_VALUE( -563.50), SIMDE_FLOAT16_VALUE( -576.50) }, + { SIMDE_FLOAT16_VALUE( -703.50), SIMDE_FLOAT16_VALUE( 384.00), SIMDE_FLOAT16_VALUE( -772.50), SIMDE_FLOAT16_VALUE( 457.50), + SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( 653.00), SIMDE_FLOAT16_VALUE( -121.00), SIMDE_FLOAT16_VALUE( 945.50) }, + { SIMDE_FLOAT16_VALUE( -280.75), SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( 688.50), SIMDE_FLOAT16_VALUE( 192.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 11456.00), SIMDE_FLOAT16_VALUE(-12152.00), SIMDE_FLOAT16_VALUE( 13792.00), SIMDE_FLOAT16_VALUE(-13576.00), + SIMDE_FLOAT16_VALUE( 21408.00), SIMDE_FLOAT16_VALUE(-19712.00), SIMDE_FLOAT16_VALUE( 29168.00), SIMDE_FLOAT16_VALUE(-30320.00) } }, + { { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -323.75), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -283.75), + SIMDE_FLOAT16_VALUE( -117.75), SIMDE_FLOAT16_VALUE( -841.50), SIMDE_FLOAT16_VALUE( 665.00), SIMDE_FLOAT16_VALUE( -987.00) }, + { SIMDE_FLOAT16_VALUE( -643.00), SIMDE_FLOAT16_VALUE( -152.12), SIMDE_FLOAT16_VALUE( 964.00), SIMDE_FLOAT16_VALUE( 920.00), + SIMDE_FLOAT16_VALUE( 630.50), SIMDE_FLOAT16_VALUE( -669.50), SIMDE_FLOAT16_VALUE( 671.00), SIMDE_FLOAT16_VALUE( 257.00) }, + { SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -857.50), SIMDE_FLOAT16_VALUE( 334.75), SIMDE_FLOAT16_VALUE( -617.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 1032.00), SIMDE_FLOAT16_VALUE( -1878.00), SIMDE_FLOAT16_VALUE(-10288.00), SIMDE_FLOAT16_VALUE( 9120.00), + SIMDE_FLOAT16_VALUE( 6724.00), SIMDE_FLOAT16_VALUE( -7684.00), SIMDE_FLOAT16_VALUE( -1961.00), SIMDE_FLOAT16_VALUE( 1639.00) } }, + { { SIMDE_FLOAT16_VALUE( -439.50), SIMDE_FLOAT16_VALUE( 245.12), SIMDE_FLOAT16_VALUE( 111.06), SIMDE_FLOAT16_VALUE( 520.50), + SIMDE_FLOAT16_VALUE( 85.50), SIMDE_FLOAT16_VALUE( 250.25), SIMDE_FLOAT16_VALUE( -680.00), SIMDE_FLOAT16_VALUE( -750.00) }, + { SIMDE_FLOAT16_VALUE( -138.25), SIMDE_FLOAT16_VALUE( -14.62), SIMDE_FLOAT16_VALUE( -921.50), SIMDE_FLOAT16_VALUE( 225.88), + SIMDE_FLOAT16_VALUE( 242.88), SIMDE_FLOAT16_VALUE( 869.50), SIMDE_FLOAT16_VALUE( 298.00), SIMDE_FLOAT16_VALUE( 105.69) }, + { SIMDE_FLOAT16_VALUE( -722.50), SIMDE_FLOAT16_VALUE( -8.75), SIMDE_FLOAT16_VALUE( -245.75), SIMDE_FLOAT16_VALUE( 915.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -567.50), SIMDE_FLOAT16_VALUE( 373.00), SIMDE_FLOAT16_VALUE( 2088.00), SIMDE_FLOAT16_VALUE( -1456.00), + SIMDE_FLOAT16_VALUE( 7692.00), SIMDE_FLOAT16_VALUE( -7356.00), SIMDE_FLOAT16_VALUE( 244.75), SIMDE_FLOAT16_VALUE( -1675.00) } }, + { { SIMDE_FLOAT16_VALUE( 54.19), SIMDE_FLOAT16_VALUE( -928.00), SIMDE_FLOAT16_VALUE( 362.50), SIMDE_FLOAT16_VALUE( -936.50), + SIMDE_FLOAT16_VALUE( 185.88), SIMDE_FLOAT16_VALUE( -244.38), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( -644.00) }, + { SIMDE_FLOAT16_VALUE( -517.00), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -751.50), SIMDE_FLOAT16_VALUE( -974.00), + SIMDE_FLOAT16_VALUE( -144.38), SIMDE_FLOAT16_VALUE( 338.25), SIMDE_FLOAT16_VALUE( 705.00), SIMDE_FLOAT16_VALUE( 116.88) }, + { SIMDE_FLOAT16_VALUE( 49.38), SIMDE_FLOAT16_VALUE( -363.00), SIMDE_FLOAT16_VALUE( -476.25), SIMDE_FLOAT16_VALUE( 106.69) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 30416.00), SIMDE_FLOAT16_VALUE(-31296.00), SIMDE_FLOAT16_VALUE( 48448.00), SIMDE_FLOAT16_VALUE(-49024.00), + SIMDE_FLOAT16_VALUE(-16512.00), SIMDE_FLOAT16_VALUE( 16464.00), SIMDE_FLOAT16_VALUE( -4848.00), SIMDE_FLOAT16_VALUE( 5128.00) } }, + { { SIMDE_FLOAT16_VALUE( -726.00), SIMDE_FLOAT16_VALUE( -353.75), SIMDE_FLOAT16_VALUE( 268.50), SIMDE_FLOAT16_VALUE( 729.00), + SIMDE_FLOAT16_VALUE( -470.25), SIMDE_FLOAT16_VALUE( 81.88), SIMDE_FLOAT16_VALUE( 72.25), SIMDE_FLOAT16_VALUE( -992.50) }, + { SIMDE_FLOAT16_VALUE( -615.50), SIMDE_FLOAT16_VALUE( 620.50), SIMDE_FLOAT16_VALUE( -606.50), SIMDE_FLOAT16_VALUE( -327.75), + SIMDE_FLOAT16_VALUE( -331.75), SIMDE_FLOAT16_VALUE( -606.00), SIMDE_FLOAT16_VALUE( -295.75), SIMDE_FLOAT16_VALUE( -275.50) }, + { SIMDE_FLOAT16_VALUE( -752.50), SIMDE_FLOAT16_VALUE( 21.91), SIMDE_FLOAT16_VALUE( 827.00), SIMDE_FLOAT16_VALUE( 600.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-14320.00), SIMDE_FLOAT16_VALUE( 13240.00), SIMDE_FLOAT16_VALUE( 7448.00), SIMDE_FLOAT16_VALUE( -6452.00), + SIMDE_FLOAT16_VALUE( 12808.00), SIMDE_FLOAT16_VALUE(-13192.00), SIMDE_FLOAT16_VALUE( 6108.00), SIMDE_FLOAT16_VALUE( -7028.00) } } + + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x8_t r; + SIMDE_CONSTIFY_2_(simde_vcmlaq_rot90_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + // simde_float16x8_t r = simde_vcmlaq_rot90_lane_f16(r_, a, b, test_vec[i].lane); + // simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x8_t r = simde_vcmlaq_lane_f16(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmlaq_rot90_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32_t r_[4]; + simde_float32_t a[4]; + simde_float32_t b[2]; + const int lane; + simde_float32_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 301.65), SIMDE_FLOAT32_C( 490.71), SIMDE_FLOAT32_C( -744.66), SIMDE_FLOAT32_C( -738.17) }, + { SIMDE_FLOAT32_C( -301.20), SIMDE_FLOAT32_C( -904.34), SIMDE_FLOAT32_C( 771.98), SIMDE_FLOAT32_C( 233.71) }, + { SIMDE_FLOAT32_C( 830.18), SIMDE_FLOAT32_C( 979.39) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(751066.625000), SIMDE_FLOAT32_C(-750274.312500), SIMDE_FLOAT32_C(-194766.031250), SIMDE_FLOAT32_C(193283.203125) } }, + { { SIMDE_FLOAT32_C( -38.01), SIMDE_FLOAT32_C( 151.61), SIMDE_FLOAT32_C( 201.45), SIMDE_FLOAT32_C( -747.32) }, + { SIMDE_FLOAT32_C( -331.17), SIMDE_FLOAT32_C( 7.62), SIMDE_FLOAT32_C( -454.77), SIMDE_FLOAT32_C( -381.81) }, + { SIMDE_FLOAT32_C( 236.39), SIMDE_FLOAT32_C( -158.94) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-1839.301758), SIMDE_FLOAT32_C(1952.901733), SIMDE_FLOAT32_C(90457.515625), SIMDE_FLOAT32_C(-91003.382812) } }, + { { SIMDE_FLOAT32_C( 605.85), SIMDE_FLOAT32_C( 244.27), SIMDE_FLOAT32_C( -426.53), SIMDE_FLOAT32_C( -969.18) }, + { SIMDE_FLOAT32_C( 322.13), SIMDE_FLOAT32_C( 863.77), SIMDE_FLOAT32_C( -685.35), SIMDE_FLOAT32_C( -710.70) }, + { SIMDE_FLOAT32_C( -791.12), SIMDE_FLOAT32_C( 373.53) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(683951.562500), SIMDE_FLOAT32_C(-683101.437500), SIMDE_FLOAT32_C(-562675.500000), SIMDE_FLOAT32_C(561279.812500) } }, + { { SIMDE_FLOAT32_C( -606.46), SIMDE_FLOAT32_C( -507.55), SIMDE_FLOAT32_C( -68.24), SIMDE_FLOAT32_C( -823.05) }, + { SIMDE_FLOAT32_C( -359.95), SIMDE_FLOAT32_C( 611.92), SIMDE_FLOAT32_C( 514.14), SIMDE_FLOAT32_C( -660.86) }, + { SIMDE_FLOAT32_C( 181.81), SIMDE_FLOAT32_C( 115.86) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-111859.632812), SIMDE_FLOAT32_C(110745.617188), SIMDE_FLOAT32_C(120082.710938), SIMDE_FLOAT32_C(-120974.000000) } }, + { { SIMDE_FLOAT32_C( 951.84), SIMDE_FLOAT32_C( -864.96), SIMDE_FLOAT32_C( 970.59), SIMDE_FLOAT32_C( 769.97) }, + { SIMDE_FLOAT32_C( 609.64), SIMDE_FLOAT32_C( -580.87), SIMDE_FLOAT32_C( 358.59), SIMDE_FLOAT32_C( 350.56) }, + { SIMDE_FLOAT32_C( 26.31), SIMDE_FLOAT32_C( 163.74) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(16234.529297), SIMDE_FLOAT32_C(-16147.649414), SIMDE_FLOAT32_C(-8252.643555), SIMDE_FLOAT32_C(9993.203125) } }, + { { SIMDE_FLOAT32_C( -636.68), SIMDE_FLOAT32_C( 554.68), SIMDE_FLOAT32_C( -385.40), SIMDE_FLOAT32_C( -565.95) }, + { SIMDE_FLOAT32_C( 129.69), SIMDE_FLOAT32_C( 961.79), SIMDE_FLOAT32_C( -333.22), SIMDE_FLOAT32_C( 69.65) }, + { SIMDE_FLOAT32_C( 726.96), SIMDE_FLOAT32_C( 131.41) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-699819.562500), SIMDE_FLOAT32_C(699737.562500), SIMDE_FLOAT32_C(-51018.167969), SIMDE_FLOAT32_C(50066.816406) } }, + { { SIMDE_FLOAT32_C( -211.91), SIMDE_FLOAT32_C( 829.24), SIMDE_FLOAT32_C( -475.13), SIMDE_FLOAT32_C( 562.33) }, + { SIMDE_FLOAT32_C( 515.59), SIMDE_FLOAT32_C( -290.69), SIMDE_FLOAT32_C( -816.53), SIMDE_FLOAT32_C( 17.39) }, + { SIMDE_FLOAT32_C( 268.14), SIMDE_FLOAT32_C( 729.88) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(77733.710938), SIMDE_FLOAT32_C(-77116.382812), SIMDE_FLOAT32_C(-5138.084473), SIMDE_FLOAT32_C(5225.284668) } }, + { { SIMDE_FLOAT32_C( -894.99), SIMDE_FLOAT32_C( 516.42), SIMDE_FLOAT32_C( -169.55), SIMDE_FLOAT32_C( 696.41) }, + { SIMDE_FLOAT32_C( -388.51), SIMDE_FLOAT32_C( 987.71), SIMDE_FLOAT32_C( -91.49), SIMDE_FLOAT32_C( -970.85) }, + { SIMDE_FLOAT32_C( 357.28), SIMDE_FLOAT32_C( -28.01) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-353784.031250), SIMDE_FLOAT32_C(353405.468750), SIMDE_FLOAT32_C(346695.718750), SIMDE_FLOAT32_C(-346168.875000) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); + simde_float32x4_t r = simde_vcmlaq_rot90_lane_f32(r_, a, b, 0); + // SIMDE_CONSTIFY_2_(simde_vcmlaq_rot90_lane_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x4_t r = simde_vcmlaq_lane_f32(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmlaq_rot90_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t r_[8]; + simde_float16_t a[8]; + simde_float16_t b[8]; + const int lane; + simde_float16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( -30.36), SIMDE_FLOAT16_VALUE( 631.50), SIMDE_FLOAT16_VALUE( 851.00), SIMDE_FLOAT16_VALUE( -263.50), + SIMDE_FLOAT16_VALUE( 140.00), SIMDE_FLOAT16_VALUE( 859.00), SIMDE_FLOAT16_VALUE( -834.50), SIMDE_FLOAT16_VALUE( 216.12) }, + { SIMDE_FLOAT16_VALUE( 996.00), SIMDE_FLOAT16_VALUE( 529.50), SIMDE_FLOAT16_VALUE( 79.06), SIMDE_FLOAT16_VALUE( 947.00), + SIMDE_FLOAT16_VALUE( 122.00), SIMDE_FLOAT16_VALUE( -250.00), SIMDE_FLOAT16_VALUE( -361.75), SIMDE_FLOAT16_VALUE( 265.25) }, + { SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 2.71), SIMDE_FLOAT16_VALUE( 99.81), SIMDE_FLOAT16_VALUE( -137.62), + SIMDE_FLOAT16_VALUE( -761.00), SIMDE_FLOAT16_VALUE( 813.00), SIMDE_FLOAT16_VALUE( -897.50), SIMDE_FLOAT16_VALUE( 653.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-31088.00), SIMDE_FLOAT16_VALUE( 31696.00), SIMDE_FLOAT16_VALUE(-54688.00), SIMDE_FLOAT16_VALUE( 55296.00), + SIMDE_FLOAT16_VALUE( 14808.00), SIMDE_FLOAT16_VALUE(-13808.00), SIMDE_FLOAT16_VALUE(-16400.00), SIMDE_FLOAT16_VALUE( 15776.00) } }, + { { SIMDE_FLOAT16_VALUE( 396.00), SIMDE_FLOAT16_VALUE( 413.00), SIMDE_FLOAT16_VALUE( 514.00), SIMDE_FLOAT16_VALUE( -977.50), + SIMDE_FLOAT16_VALUE( -672.00), SIMDE_FLOAT16_VALUE( -92.12), SIMDE_FLOAT16_VALUE( -441.25), SIMDE_FLOAT16_VALUE( -374.25) }, + { SIMDE_FLOAT16_VALUE( -152.00), SIMDE_FLOAT16_VALUE( -79.56), SIMDE_FLOAT16_VALUE( -214.62), SIMDE_FLOAT16_VALUE( -615.00), + SIMDE_FLOAT16_VALUE( 79.00), SIMDE_FLOAT16_VALUE( 84.00), SIMDE_FLOAT16_VALUE( 493.00), SIMDE_FLOAT16_VALUE( -96.00) }, + { SIMDE_FLOAT16_VALUE( 104.12), SIMDE_FLOAT16_VALUE( 78.50), SIMDE_FLOAT16_VALUE( 171.50), SIMDE_FLOAT16_VALUE( -682.50), + SIMDE_FLOAT16_VALUE( 217.12), SIMDE_FLOAT16_VALUE( 49.34), SIMDE_FLOAT16_VALUE( 256.50), SIMDE_FLOAT16_VALUE( -92.06) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 6640.00), SIMDE_FLOAT16_VALUE( -5832.00), SIMDE_FLOAT16_VALUE( 48800.00), SIMDE_FLOAT16_VALUE(-49248.00), + SIMDE_FLOAT16_VALUE( -7264.00), SIMDE_FLOAT16_VALUE( 6500.00), SIMDE_FLOAT16_VALUE( 7096.00), SIMDE_FLOAT16_VALUE( -7912.00) } }, + { { SIMDE_FLOAT16_VALUE( -728.00), SIMDE_FLOAT16_VALUE( -108.38), SIMDE_FLOAT16_VALUE( -77.88), SIMDE_FLOAT16_VALUE( -353.00), + SIMDE_FLOAT16_VALUE( -239.00), SIMDE_FLOAT16_VALUE( 704.50), SIMDE_FLOAT16_VALUE( 914.00), SIMDE_FLOAT16_VALUE( -211.12) }, + { SIMDE_FLOAT16_VALUE( -473.25), SIMDE_FLOAT16_VALUE( 74.38), SIMDE_FLOAT16_VALUE( 904.50), SIMDE_FLOAT16_VALUE( -290.50), + SIMDE_FLOAT16_VALUE( -796.00), SIMDE_FLOAT16_VALUE( 421.25), SIMDE_FLOAT16_VALUE( 215.75), SIMDE_FLOAT16_VALUE( 249.38) }, + { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -720.00), SIMDE_FLOAT16_VALUE( 32.75), SIMDE_FLOAT16_VALUE( -487.75), + SIMDE_FLOAT16_VALUE( -705.50), SIMDE_FLOAT16_VALUE( -468.00), SIMDE_FLOAT16_VALUE( -789.00), SIMDE_FLOAT16_VALUE( -866.00) }, + INT32_C( 2), + { SIMDE_FLOAT16_VALUE( -3164.00), SIMDE_FLOAT16_VALUE( 2328.00), SIMDE_FLOAT16_VALUE( 9440.00), SIMDE_FLOAT16_VALUE( -9864.00), + SIMDE_FLOAT16_VALUE(-14032.00), SIMDE_FLOAT16_VALUE( 14504.00), SIMDE_FLOAT16_VALUE( -7252.00), SIMDE_FLOAT16_VALUE( 7956.00) } }, + { { SIMDE_FLOAT16_VALUE( -891.50), SIMDE_FLOAT16_VALUE( -299.00), SIMDE_FLOAT16_VALUE( -595.00), SIMDE_FLOAT16_VALUE( -662.00), + SIMDE_FLOAT16_VALUE( -914.00), SIMDE_FLOAT16_VALUE( 674.50), SIMDE_FLOAT16_VALUE( 771.50), SIMDE_FLOAT16_VALUE( 14.33) }, + { SIMDE_FLOAT16_VALUE( 880.00), SIMDE_FLOAT16_VALUE( 767.00), SIMDE_FLOAT16_VALUE( -738.50), SIMDE_FLOAT16_VALUE( 581.50), + SIMDE_FLOAT16_VALUE( -342.00), SIMDE_FLOAT16_VALUE( 580.50), SIMDE_FLOAT16_VALUE( 534.00), SIMDE_FLOAT16_VALUE( -671.00) }, + { SIMDE_FLOAT16_VALUE( -482.75), SIMDE_FLOAT16_VALUE( 382.25), SIMDE_FLOAT16_VALUE( 503.00), SIMDE_FLOAT16_VALUE( 35.00), + SIMDE_FLOAT16_VALUE( 315.50), SIMDE_FLOAT16_VALUE( -23.56), SIMDE_FLOAT16_VALUE( 53.88), SIMDE_FLOAT16_VALUE( 722.00) }, + INT32_C( 3), + { SIMDE_FLOAT16_VALUE(-27744.00), SIMDE_FLOAT16_VALUE( 26544.00), SIMDE_FLOAT16_VALUE(-20944.00), SIMDE_FLOAT16_VALUE( 19696.00), + SIMDE_FLOAT16_VALUE(-21232.00), SIMDE_FLOAT16_VALUE( 20992.00), SIMDE_FLOAT16_VALUE( 24256.00), SIMDE_FLOAT16_VALUE(-23472.00) } }, + { { SIMDE_FLOAT16_VALUE( 525.50), SIMDE_FLOAT16_VALUE( -679.00), SIMDE_FLOAT16_VALUE( 491.50), SIMDE_FLOAT16_VALUE( -505.00), + SIMDE_FLOAT16_VALUE( 914.50), SIMDE_FLOAT16_VALUE( -312.00), SIMDE_FLOAT16_VALUE( -404.50), SIMDE_FLOAT16_VALUE( -634.00) }, + { SIMDE_FLOAT16_VALUE( -86.62), SIMDE_FLOAT16_VALUE( -914.50), SIMDE_FLOAT16_VALUE( -839.50), SIMDE_FLOAT16_VALUE( 817.50), + SIMDE_FLOAT16_VALUE( -187.25), SIMDE_FLOAT16_VALUE( 422.75), SIMDE_FLOAT16_VALUE( 604.50), SIMDE_FLOAT16_VALUE( -735.00) }, + { SIMDE_FLOAT16_VALUE( -21.31), SIMDE_FLOAT16_VALUE( -29.59), SIMDE_FLOAT16_VALUE( -725.00), SIMDE_FLOAT16_VALUE( -503.00), + SIMDE_FLOAT16_VALUE( -75.56), SIMDE_FLOAT16_VALUE( 215.38), SIMDE_FLOAT16_VALUE( -742.00), SIMDE_FLOAT16_VALUE( -854.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-18960.00), SIMDE_FLOAT16_VALUE( 18816.00), SIMDE_FLOAT16_VALUE( 17920.00), SIMDE_FLOAT16_VALUE(-17920.00), + SIMDE_FLOAT16_VALUE( 9928.00), SIMDE_FLOAT16_VALUE( -9320.00), SIMDE_FLOAT16_VALUE(-16072.00), SIMDE_FLOAT16_VALUE( 15032.00) } }, + { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( 164.88), SIMDE_FLOAT16_VALUE( 304.75), SIMDE_FLOAT16_VALUE( -530.50), + SIMDE_FLOAT16_VALUE( -603.50), SIMDE_FLOAT16_VALUE( 730.50), SIMDE_FLOAT16_VALUE( 46.66), SIMDE_FLOAT16_VALUE( 629.00) }, + { SIMDE_FLOAT16_VALUE( -537.00), SIMDE_FLOAT16_VALUE( 637.00), SIMDE_FLOAT16_VALUE( 884.50), SIMDE_FLOAT16_VALUE( 378.25), + SIMDE_FLOAT16_VALUE( -10.17), SIMDE_FLOAT16_VALUE( -730.00), SIMDE_FLOAT16_VALUE( -981.50), SIMDE_FLOAT16_VALUE( 453.25) }, + { SIMDE_FLOAT16_VALUE( -575.00), SIMDE_FLOAT16_VALUE( 12.88), SIMDE_FLOAT16_VALUE( -667.50), SIMDE_FLOAT16_VALUE( 380.50), + SIMDE_FLOAT16_VALUE( 374.75), SIMDE_FLOAT16_VALUE( -222.50), SIMDE_FLOAT16_VALUE( 206.88), SIMDE_FLOAT16_VALUE( 502.25) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -8824.00), SIMDE_FLOAT16_VALUE( 8368.00), SIMDE_FLOAT16_VALUE( -4568.00), SIMDE_FLOAT16_VALUE( 4344.00), + SIMDE_FLOAT16_VALUE( 8800.00), SIMDE_FLOAT16_VALUE( -8672.00), SIMDE_FLOAT16_VALUE( -5792.00), SIMDE_FLOAT16_VALUE( 6468.00) } }, + { { SIMDE_FLOAT16_VALUE( -825.50), SIMDE_FLOAT16_VALUE( -472.75), SIMDE_FLOAT16_VALUE( -531.00), SIMDE_FLOAT16_VALUE( -366.75), + SIMDE_FLOAT16_VALUE( 143.12), SIMDE_FLOAT16_VALUE( 698.50), SIMDE_FLOAT16_VALUE( 700.00), SIMDE_FLOAT16_VALUE( 498.25) }, + { SIMDE_FLOAT16_VALUE( 908.00), SIMDE_FLOAT16_VALUE( 845.50), SIMDE_FLOAT16_VALUE( -383.50), SIMDE_FLOAT16_VALUE( 383.50), + SIMDE_FLOAT16_VALUE( 357.75), SIMDE_FLOAT16_VALUE( -900.50), SIMDE_FLOAT16_VALUE( -802.00), SIMDE_FLOAT16_VALUE( 966.50) }, + { SIMDE_FLOAT16_VALUE( -993.00), SIMDE_FLOAT16_VALUE( 477.50), SIMDE_FLOAT16_VALUE( -23.00), SIMDE_FLOAT16_VALUE( 102.38), + SIMDE_FLOAT16_VALUE( 988.50), SIMDE_FLOAT16_VALUE( -311.75), SIMDE_FLOAT16_VALUE( -668.50), SIMDE_FLOAT16_VALUE( 148.25) }, + INT32_C( 2), + { SIMDE_FLOAT16_VALUE( 18624.00), SIMDE_FLOAT16_VALUE(-19920.00), SIMDE_FLOAT16_VALUE( 8288.00), SIMDE_FLOAT16_VALUE( -9184.00), + SIMDE_FLOAT16_VALUE(-20576.00), SIMDE_FLOAT16_VALUE( 21408.00), SIMDE_FLOAT16_VALUE( 22928.00), SIMDE_FLOAT16_VALUE(-21728.00) } }, + { { SIMDE_FLOAT16_VALUE( 213.88), SIMDE_FLOAT16_VALUE( 337.75), SIMDE_FLOAT16_VALUE( 330.50), SIMDE_FLOAT16_VALUE( -88.56), + SIMDE_FLOAT16_VALUE( 191.12), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 775.50) }, + { SIMDE_FLOAT16_VALUE( 295.50), SIMDE_FLOAT16_VALUE( 687.00), SIMDE_FLOAT16_VALUE( 406.25), SIMDE_FLOAT16_VALUE( 439.50), + SIMDE_FLOAT16_VALUE( -827.50), SIMDE_FLOAT16_VALUE( 733.00), SIMDE_FLOAT16_VALUE( 499.00), SIMDE_FLOAT16_VALUE( 931.00) }, + { SIMDE_FLOAT16_VALUE( 790.00), SIMDE_FLOAT16_VALUE( -979.00), SIMDE_FLOAT16_VALUE( 70.62), SIMDE_FLOAT16_VALUE( -47.00), + SIMDE_FLOAT16_VALUE( 228.50), SIMDE_FLOAT16_VALUE( -233.50), SIMDE_FLOAT16_VALUE( -467.50), SIMDE_FLOAT16_VALUE( 545.00) }, + INT32_C( 3), + { SIMDE_FLOAT16_VALUE( 32496.00), SIMDE_FLOAT16_VALUE(-31952.00), SIMDE_FLOAT16_VALUE( 20992.00), SIMDE_FLOAT16_VALUE(-20752.00), + SIMDE_FLOAT16_VALUE( 34656.00), SIMDE_FLOAT16_VALUE(-35072.00), SIMDE_FLOAT16_VALUE( 43648.00), SIMDE_FLOAT16_VALUE(-42976.00) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16x8_t r; + // simde_float16x8_t r = simde_vcmlaq_rot90_laneq_f16(r_, a, b, test_vec[i].lane); + // simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + SIMDE_CONSTIFY_4_(simde_vcmlaq_rot90_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t r = simde_vcmlaq_laneq_f16(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmlaq_rot90_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32_t r_[4]; + simde_float32_t a[4]; + simde_float32_t b[4]; + const int lane; + simde_float32_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 355.18), SIMDE_FLOAT32_C( 169.63), SIMDE_FLOAT32_C( 116.87), SIMDE_FLOAT32_C( -467.19) }, + { SIMDE_FLOAT32_C( -513.94), SIMDE_FLOAT32_C( -999.71), SIMDE_FLOAT32_C( -285.25), SIMDE_FLOAT32_C( -931.88) }, + { SIMDE_FLOAT32_C( -839.57), SIMDE_FLOAT32_C( -681.40), SIMDE_FLOAT32_C( -117.60), SIMDE_FLOAT32_C( -459.86) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-838971.375000), SIMDE_FLOAT32_C(839496.187500), SIMDE_FLOAT32_C(-782261.625000), SIMDE_FLOAT32_C(781911.312500) } }, + { { SIMDE_FLOAT32_C( -57.67), SIMDE_FLOAT32_C( -897.07), SIMDE_FLOAT32_C( 118.98), SIMDE_FLOAT32_C( -387.92) }, + { SIMDE_FLOAT32_C( -362.79), SIMDE_FLOAT32_C( 160.99), SIMDE_FLOAT32_C( -2.72), SIMDE_FLOAT32_C( 206.65) }, + { SIMDE_FLOAT32_C( 49.34), SIMDE_FLOAT32_C( 511.85), SIMDE_FLOAT32_C( 547.20), SIMDE_FLOAT32_C( -119.58) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-82460.406250), SIMDE_FLOAT32_C(81505.664062), SIMDE_FLOAT32_C(-105654.820312), SIMDE_FLOAT32_C(105385.882812) } }, + { { SIMDE_FLOAT32_C( -219.54), SIMDE_FLOAT32_C( -959.14), SIMDE_FLOAT32_C( 943.92), SIMDE_FLOAT32_C( 628.48) }, + { SIMDE_FLOAT32_C( 446.65), SIMDE_FLOAT32_C( -500.77), SIMDE_FLOAT32_C( -347.79), SIMDE_FLOAT32_C( 813.11) }, + { SIMDE_FLOAT32_C( -542.25), SIMDE_FLOAT32_C( 232.48), SIMDE_FLOAT32_C( 684.35), SIMDE_FLOAT32_C( 710.26) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-271762.062500), SIMDE_FLOAT32_C(270583.375000), SIMDE_FLOAT32_C(441852.812500), SIMDE_FLOAT32_C(-440280.406250) } }, + { { SIMDE_FLOAT32_C( 783.09), SIMDE_FLOAT32_C( -727.02), SIMDE_FLOAT32_C( -586.46), SIMDE_FLOAT32_C( 64.33) }, + { SIMDE_FLOAT32_C( -490.08), SIMDE_FLOAT32_C( 740.49), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -759.78) }, + { SIMDE_FLOAT32_C( -380.84), SIMDE_FLOAT32_C( 993.01), SIMDE_FLOAT32_C( -759.56), SIMDE_FLOAT32_C( 861.16) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-734530.875000), SIMDE_FLOAT32_C(734586.937500), SIMDE_FLOAT32_C(753882.687500), SIMDE_FLOAT32_C(-754404.875000) } }, + { { SIMDE_FLOAT32_C( 998.31), SIMDE_FLOAT32_C( 538.40), SIMDE_FLOAT32_C( -191.12), SIMDE_FLOAT32_C( -434.48) }, + { SIMDE_FLOAT32_C( 592.83), SIMDE_FLOAT32_C( 820.32), SIMDE_FLOAT32_C( -296.84), SIMDE_FLOAT32_C( -612.30) }, + { SIMDE_FLOAT32_C( -552.34), SIMDE_FLOAT32_C( 329.08), SIMDE_FLOAT32_C( 765.26), SIMDE_FLOAT32_C( -531.08) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(454093.875000), SIMDE_FLOAT32_C(-452557.187500), SIMDE_FLOAT32_C(-338388.906250), SIMDE_FLOAT32_C(337763.312500) } }, + { { SIMDE_FLOAT32_C( 52.61), SIMDE_FLOAT32_C( 606.93), SIMDE_FLOAT32_C( -894.22), SIMDE_FLOAT32_C( -854.38) }, + { SIMDE_FLOAT32_C( 972.80), SIMDE_FLOAT32_C( -807.39), SIMDE_FLOAT32_C( 668.59), SIMDE_FLOAT32_C( -228.19) }, + { SIMDE_FLOAT32_C( -528.51), SIMDE_FLOAT32_C( 730.93), SIMDE_FLOAT32_C( -230.95), SIMDE_FLOAT32_C( -140.17) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(590198.187500), SIMDE_FLOAT32_C(-589538.625000), SIMDE_FLOAT32_C(165896.703125), SIMDE_FLOAT32_C(-167645.296875) } }, + { { SIMDE_FLOAT32_C( 556.73), SIMDE_FLOAT32_C( -701.90), SIMDE_FLOAT32_C( -356.50), SIMDE_FLOAT32_C( -532.42) }, + { SIMDE_FLOAT32_C( 856.94), SIMDE_FLOAT32_C( -261.67), SIMDE_FLOAT32_C( -208.07), SIMDE_FLOAT32_C( 27.93) }, + { SIMDE_FLOAT32_C( 924.32), SIMDE_FLOAT32_C( -863.60), SIMDE_FLOAT32_C( -687.65), SIMDE_FLOAT32_C( 238.39) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(242423.562500), SIMDE_FLOAT32_C(-242568.734375), SIMDE_FLOAT32_C(-26172.757812), SIMDE_FLOAT32_C(25283.837891) } }, + { { SIMDE_FLOAT32_C( -286.79), SIMDE_FLOAT32_C( 630.61), SIMDE_FLOAT32_C( -989.22), SIMDE_FLOAT32_C( 223.21) }, + { SIMDE_FLOAT32_C( 812.31), SIMDE_FLOAT32_C( 667.33), SIMDE_FLOAT32_C( 841.41), SIMDE_FLOAT32_C( 735.52) }, + { SIMDE_FLOAT32_C( 308.52), SIMDE_FLOAT32_C( -189.06), SIMDE_FLOAT32_C( -63.33), SIMDE_FLOAT32_C( 837.76) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(125878.625000), SIMDE_FLOAT32_C(-125534.804688), SIMDE_FLOAT32_C(138068.187500), SIMDE_FLOAT32_C(-138834.203125) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); + simde_float32x4_t r; + SIMDE_CONSTIFY_2_(simde_vcmlaq_rot90_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + // simde_float32x4_t r = simde_vcmlaq_rot90_laneq_f32(r_, a, b, test_vec[i].lane); + // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t r = simde_vcmlaq_rot90_laneq_f32(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vcmla_rot90_lane_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmla_rot90_lane_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmla_rot90_laneq_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmla_rot90_laneq_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_rot90_lane_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_rot90_lane_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_rot90_laneq_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_rot90_laneq_f32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" \ No newline at end of file From 4b6c6c7c4e872819c84737421f131c1091ef2595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Mon, 16 Oct 2023 17:39:56 +0800 Subject: [PATCH 05/29] [Neon] Add vcmla_rot180_lane_f{16/32} and vcmla_rot180_laneq_f{16/32} and vcmlaq_rot180_lane_f{16/32} and vcmlaq_rot180_laneq_f{16/32} --- simde/arm/neon/cmla_rot180_lane.h | 328 +++++++++++++ test/arm/neon/cmla_rot180_lane.c | 766 ++++++++++++++++++++++++++++++ 2 files changed, 1094 insertions(+) create mode 100644 simde/arm/neon/cmla_rot180_lane.h create mode 100644 test/arm/neon/cmla_rot180_lane.c diff --git a/simde/arm/neon/cmla_rot180_lane.h b/simde/arm/neon/cmla_rot180_lane.h new file mode 100644 index 000000000..f41fd8fd9 --- /dev/null +++ b/simde/arm/neon/cmla_rot180_lane.h @@ -0,0 +1,328 @@ +/* SPDX-License-Identifier: MIT +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, copy, +* modify, merge, publish, distribute, sublicense, and/or sell copies +* of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +* +* Copyright: +* 2023 Chi-Wei Chu +*/ + +#if !defined(SIMDE_ARM_NEON_CMLA_ROT180_LANE_H) +#define SIMDE_ARM_NEON_CMLA_ROT180_LANE_H + +#include "types.h" +#include "dup_lane.h" +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vcmla_rot180_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float16x4_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_2_(vcmla_rot180_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + + #else + simde_float16x4_private + r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } + #endif + + result = simde_float16x4_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmla_rot180_lane_f16 + #define vcmla_rot180_lane_f16(r, a, b, lane) simde_vcmla_rot180_lane_f16(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcmla_rot180_lane_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcmla_rot180_lane_f32(r, a, b, 0); + #else + simde_float32x2_private + r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x2_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, -b_.values, 0, 1); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmla_rot180_lane_f32 + #define vcmla_rot180_lane_f32(r, a, b, lane) simde_vcmla_rot180_lane_f32(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float16x8_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_2_(vcmlaq_rot180_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float16x8_private + r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16(simde_float16x4_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, 5, 6, 7); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } + #endif + + result = simde_float16x8_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_rot180_lane_f16 + #define vcmlaq_rot180_lane_f16(r, a, b, lane) simde_vcmlaq_rot180_lane_f16(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcmlaq_rot180_lane_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcmlaq_rot180_lane_f32(r, a, b, 0); + #else + simde_float32x4_private + r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x2_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 0, 1, 2, 3); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_rot180_lane_f32 + #define vcmlaq_rot180_lane_f32(r, a, b, lane) simde_vcmlaq_rot180_lane_f32(r, a, b, lane) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vcmla_rot180_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float16x4_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_2_(vcmla_rot180_laneq_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float16x4_private + r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } + #endif + + result = simde_float16x4_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmla_rot180_laneq_f16 + #define vcmla_rot180_laneq_f16(r, a, b, lane) simde_vcmla_rot180_laneq_f16(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcmla_rot180_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x2_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_2_(vcmla_rot180_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float32x2_private + r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x4_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, -b_.values, 0, 1); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } + #endif + + result = simde_float32x2_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmla_rot180_laneq_f32 + #define vcmla_rot180_laneq_f32(r, a, b, lane) simde_vcmla_rot180_laneq_f32(r, a, b, lane) +#endif + + + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_float16x8_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_4_(vcmlaq_rot180_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float16x8_private + r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16(simde_float16x8_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, 5, 6, 7); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } + #endif + + result = simde_float16x8_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_rot180_laneq_f16 + #define vcmlaq_rot180_laneq_f16(r, a, b, lane) simde_vcmlaq_rot180_laneq_f16(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcmlaq_rot180_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x4_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_2_(vcmlaq_rot180_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float32x4_private + r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x4_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 0, 1, 2, 3); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } + #endif + + result = simde_float32x4_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_rot180_laneq_f32 + #define vcmlaq_rot180_laneq_f32(r, a, b, lane) simde_vcmlaq_rot180_laneq_f32(r, a, b, lane) +#endif + + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT180_LANE_H) */ \ No newline at end of file diff --git a/test/arm/neon/cmla_rot180_lane.c b/test/arm/neon/cmla_rot180_lane.c new file mode 100644 index 000000000..b24d61ca4 --- /dev/null +++ b/test/arm/neon/cmla_rot180_lane.c @@ -0,0 +1,766 @@ +#define SIMDE_TEST_ARM_NEON_INSN cmla_rot180_lane + +#include "test-neon.h" +#include "../../../simde/arm/neon/cmla_rot180_lane.h" +#include "../../../simde/arm/neon/dup_n.h" + +static int +test_simde_vcmla_rot180_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t r_[4]; + simde_float16_t a[4]; + simde_float16_t b[4]; + const int lane; + simde_float16_t r[4]; + } test_vec[] = { + { + { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) }, + { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -31.19) }, + { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -48.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 7624.00), SIMDE_FLOAT16_VALUE( 7564.00), SIMDE_FLOAT16_VALUE( 9160.00), SIMDE_FLOAT16_VALUE( 9224.00) } }, + { { SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, + { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -14.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, + { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 61.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-17248.00), SIMDE_FLOAT16_VALUE(-17872.00), SIMDE_FLOAT16_VALUE( 8064.00), SIMDE_FLOAT16_VALUE( 9344.00) } }, + { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, + { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, + { SIMDE_FLOAT16_VALUE( -61.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 8448.00), SIMDE_FLOAT16_VALUE( 8232.00), SIMDE_FLOAT16_VALUE( -7296.00), SIMDE_FLOAT16_VALUE( -8968.00) } }, + { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -180.50) }, + { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 129.00), SIMDE_FLOAT16_VALUE( 99.06), SIMDE_FLOAT16_VALUE( -75.25) }, + { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 239.50), SIMDE_FLOAT16_VALUE( -29.96), SIMDE_FLOAT16_VALUE( -177.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 37728.00), SIMDE_FLOAT16_VALUE( 37440.00), SIMDE_FLOAT16_VALUE(-23856.00), SIMDE_FLOAT16_VALUE(-23904.00) } }, + { { SIMDE_FLOAT16_VALUE( 167.25), SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( -63.38), SIMDE_FLOAT16_VALUE( 57.00) }, + { SIMDE_FLOAT16_VALUE( 191.75), SIMDE_FLOAT16_VALUE( -197.00), SIMDE_FLOAT16_VALUE( 285.00), SIMDE_FLOAT16_VALUE( -529.00) }, + { SIMDE_FLOAT16_VALUE( -80.50), SIMDE_FLOAT16_VALUE( 375.50), SIMDE_FLOAT16_VALUE( -206.00), SIMDE_FLOAT16_VALUE( -75.25) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 15600.00), SIMDE_FLOAT16_VALUE( 15432.00), SIMDE_FLOAT16_VALUE( 22880.00), SIMDE_FLOAT16_VALUE( 22992.00) } }, + { { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( -41.00), SIMDE_FLOAT16_VALUE( 285.00) }, + { SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -271.25), SIMDE_FLOAT16_VALUE( -257.50) }, + { SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( -180.50), SIMDE_FLOAT16_VALUE( 69.62), SIMDE_FLOAT16_VALUE( 131.38) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -305.50), SIMDE_FLOAT16_VALUE( -349.50), SIMDE_FLOAT16_VALUE(-48992.00), SIMDE_FLOAT16_VALUE(-48672.00) } }, + { { SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -247.00), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 17.94) }, + { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( -110.75), SIMDE_FLOAT16_VALUE( 18.20) }, + { SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 97.31) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 35936.00), SIMDE_FLOAT16_VALUE( 35488.00), SIMDE_FLOAT16_VALUE( 10008.00), SIMDE_FLOAT16_VALUE( 9968.00) } }, + { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00) }, + { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( -151.12) }, + { SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44), SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( 75.88) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 16880.00), SIMDE_FLOAT16_VALUE( 17040.00), SIMDE_FLOAT16_VALUE( 14712.00), SIMDE_FLOAT16_VALUE( 14832.00) } } + + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x4_t r; + SIMDE_CONSTIFY_2_(simde_vcmla_rot180_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + + // simde_float16x4_t r = simde_vcmla_rot180_lane_f16(r_, a, b, test_vec[i].lane); + // simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t r = simde_vcmla_rot180_lane_f16(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmla_rot180_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32_t r_[2]; + simde_float32_t a[2]; + simde_float32_t b[2]; + const int lane; + simde_float32_t r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 83.21), SIMDE_FLOAT32_C( 417.90) }, + { SIMDE_FLOAT32_C( -875.72), SIMDE_FLOAT32_C( 830.54) }, + { SIMDE_FLOAT32_C( -633.53), SIMDE_FLOAT32_C( 832.17) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-554711.687500), SIMDE_FLOAT32_C(-554377.000000) } }, + { { SIMDE_FLOAT32_C( -890.17), SIMDE_FLOAT32_C( 649.92) }, + { SIMDE_FLOAT32_C( -111.22), SIMDE_FLOAT32_C( -830.36) }, + { SIMDE_FLOAT32_C( 59.76), SIMDE_FLOAT32_C( 970.61) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(5756.336914), SIMDE_FLOAT32_C(7296.427246) } }, + { { SIMDE_FLOAT32_C( 522.31), SIMDE_FLOAT32_C( -822.40) }, + { SIMDE_FLOAT32_C( 411.34), SIMDE_FLOAT32_C( -692.35) }, + { SIMDE_FLOAT32_C( 648.71), SIMDE_FLOAT32_C( 385.20) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-266318.062500), SIMDE_FLOAT32_C(-267662.781250) } }, + { { SIMDE_FLOAT32_C( 479.18), SIMDE_FLOAT32_C( -793.73) }, + { SIMDE_FLOAT32_C( -740.26), SIMDE_FLOAT32_C( 245.04) }, + { SIMDE_FLOAT32_C( 229.26), SIMDE_FLOAT32_C( -113.23) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(170191.187500), SIMDE_FLOAT32_C(168918.281250) } }, + { { SIMDE_FLOAT32_C( 331.48), SIMDE_FLOAT32_C( -677.34) }, + { SIMDE_FLOAT32_C( 97.30), SIMDE_FLOAT32_C( -52.10) }, + { SIMDE_FLOAT32_C( 239.17), SIMDE_FLOAT32_C( 469.68) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-22939.761719), SIMDE_FLOAT32_C(-23948.582031) } }, + { { SIMDE_FLOAT32_C( -543.40), SIMDE_FLOAT32_C( 826.85) }, + { SIMDE_FLOAT32_C( 226.38), SIMDE_FLOAT32_C( 178.84) }, + { SIMDE_FLOAT32_C( 181.71), SIMDE_FLOAT32_C( 420.52) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-41678.914062), SIMDE_FLOAT32_C(-40308.664062) } }, + { { SIMDE_FLOAT32_C( -698.84), SIMDE_FLOAT32_C( -151.15) }, + { SIMDE_FLOAT32_C( -388.27), SIMDE_FLOAT32_C( 350.81) }, + { SIMDE_FLOAT32_C( 890.40), SIMDE_FLOAT32_C( -664.75) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(345016.781250), SIMDE_FLOAT32_C(345564.468750) } }, + { { SIMDE_FLOAT32_C( -617.94), SIMDE_FLOAT32_C( 190.84) }, + { SIMDE_FLOAT32_C( 218.13), SIMDE_FLOAT32_C( -328.97) }, + { SIMDE_FLOAT32_C( -549.59), SIMDE_FLOAT32_C( -459.89) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(119264.132812), SIMDE_FLOAT32_C(120072.914062) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); + simde_float32x2_t r = simde_vcmla_rot180_lane_f32(r_, a, b, 0); + simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); + // simde_float32x2_t r; + // SIMDE_CONSTIFY_2_(simde_vcmla_rot180_lane_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t r = simde_vcmla_rot180_lane_f32(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + + +static int +test_simde_vcmla_rot180_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t r_[4]; + simde_float16_t a[4]; + simde_float16_t b[8]; + const int lane; + simde_float16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 816.50), SIMDE_FLOAT16_VALUE( 773.50), SIMDE_FLOAT16_VALUE( -377.75), SIMDE_FLOAT16_VALUE( 544.50) }, + { SIMDE_FLOAT16_VALUE( -188.38), SIMDE_FLOAT16_VALUE( 269.75), SIMDE_FLOAT16_VALUE( 115.62), SIMDE_FLOAT16_VALUE( -544.00) }, + { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( 484.00), SIMDE_FLOAT16_VALUE( -757.00), + SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 8896.00), SIMDE_FLOAT16_VALUE( 8856.00), SIMDE_FLOAT16_VALUE( -5340.00), SIMDE_FLOAT16_VALUE( -4416.00) } }, + { { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, + { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, + { SIMDE_FLOAT16_VALUE( -899.00), SIMDE_FLOAT16_VALUE( 32.51), SIMDE_FLOAT16_VALUE( 359.00), SIMDE_FLOAT16_VALUE( 342.25), + SIMDE_FLOAT16_VALUE( 913.00), SIMDE_FLOAT16_VALUE( 740.50), SIMDE_FLOAT16_VALUE( -436.25), SIMDE_FLOAT16_VALUE( -904.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 498.75), SIMDE_FLOAT16_VALUE( 91.62), SIMDE_FLOAT16_VALUE( -3892.00), SIMDE_FLOAT16_VALUE( -3032.00) } }, + { { SIMDE_FLOAT16_VALUE( -335.25), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( 227.25), SIMDE_FLOAT16_VALUE( -8.67) }, + { SIMDE_FLOAT16_VALUE( 490.50), SIMDE_FLOAT16_VALUE( 507.50), SIMDE_FLOAT16_VALUE( -979.50), SIMDE_FLOAT16_VALUE( -718.00) }, + { SIMDE_FLOAT16_VALUE( -10.20), SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -797.50), SIMDE_FLOAT16_VALUE( 741.50), + SIMDE_FLOAT16_VALUE( -835.00), SIMDE_FLOAT16_VALUE( -308.00), SIMDE_FLOAT16_VALUE( -674.50), SIMDE_FLOAT16_VALUE( -247.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 4668.00), SIMDE_FLOAT16_VALUE( 4752.00), SIMDE_FLOAT16_VALUE( -9768.00), SIMDE_FLOAT16_VALUE(-10000.00) } }, + { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 25.50), SIMDE_FLOAT16_VALUE( -44.50) }, + { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -66.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, + { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 85.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50), + SIMDE_FLOAT16_VALUE( -975.50), SIMDE_FLOAT16_VALUE( 383.50), SIMDE_FLOAT16_VALUE( 500.25), SIMDE_FLOAT16_VALUE( 517.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-12368.00), SIMDE_FLOAT16_VALUE(-12592.00), SIMDE_FLOAT16_VALUE( 11288.00), SIMDE_FLOAT16_VALUE( 11216.00) } }, + { { SIMDE_FLOAT16_VALUE( 540.00), SIMDE_FLOAT16_VALUE( -927.50), SIMDE_FLOAT16_VALUE( 17.94), SIMDE_FLOAT16_VALUE( 340.75) }, + { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( 854.00), SIMDE_FLOAT16_VALUE( -110.75) }, + { SIMDE_FLOAT16_VALUE( -53.36), SIMDE_FLOAT16_VALUE( -465.00), SIMDE_FLOAT16_VALUE( -104.00), SIMDE_FLOAT16_VALUE( -348.00), + SIMDE_FLOAT16_VALUE( -370.75), SIMDE_FLOAT16_VALUE( 18.20), SIMDE_FLOAT16_VALUE( -209.25), SIMDE_FLOAT16_VALUE( -856.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-20688.00), SIMDE_FLOAT16_VALUE(-22160.00), SIMDE_FLOAT16_VALUE( 45600.00), SIMDE_FLOAT16_VALUE( 45920.00) } }, + { { SIMDE_FLOAT16_VALUE( 251.25), SIMDE_FLOAT16_VALUE( 523.50), SIMDE_FLOAT16_VALUE( -62.34), SIMDE_FLOAT16_VALUE( -412.00) }, + { SIMDE_FLOAT16_VALUE( 818.50), SIMDE_FLOAT16_VALUE( -266.25), SIMDE_FLOAT16_VALUE( 558.50), SIMDE_FLOAT16_VALUE( -474.75) }, + { SIMDE_FLOAT16_VALUE( 583.50), SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 875.50), SIMDE_FLOAT16_VALUE( 382.50), + SIMDE_FLOAT16_VALUE( -535.00), SIMDE_FLOAT16_VALUE( 374.50), SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 432.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-47744.00), SIMDE_FLOAT16_VALUE(-47488.00), SIMDE_FLOAT16_VALUE(-32832.00), SIMDE_FLOAT16_VALUE(-33184.00) } }, + { { SIMDE_FLOAT16_VALUE( 83.19), SIMDE_FLOAT16_VALUE( 418.00), SIMDE_FLOAT16_VALUE( -875.50), SIMDE_FLOAT16_VALUE( 830.50) }, + { SIMDE_FLOAT16_VALUE( -633.50), SIMDE_FLOAT16_VALUE( 832.00), SIMDE_FLOAT16_VALUE( -890.00), SIMDE_FLOAT16_VALUE( 650.00) }, + { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -830.50), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( 970.50), + SIMDE_FLOAT16_VALUE( 522.50), SIMDE_FLOAT16_VALUE( -822.50), SIMDE_FLOAT16_VALUE( 411.25), SIMDE_FLOAT16_VALUE( -692.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-19680.00), SIMDE_FLOAT16_VALUE(-19344.00), SIMDE_FLOAT16_VALUE(-28640.00), SIMDE_FLOAT16_VALUE(-26928.00) } }, + { { SIMDE_FLOAT16_VALUE( 648.50), SIMDE_FLOAT16_VALUE( 385.25), SIMDE_FLOAT16_VALUE( 479.25), SIMDE_FLOAT16_VALUE( -793.50) }, + { SIMDE_FLOAT16_VALUE( -740.50), SIMDE_FLOAT16_VALUE( 245.00), SIMDE_FLOAT16_VALUE( 229.25), SIMDE_FLOAT16_VALUE( -113.25) }, + { SIMDE_FLOAT16_VALUE( 331.50), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( 97.31), SIMDE_FLOAT16_VALUE( -52.09), + SIMDE_FLOAT16_VALUE( 239.12), SIMDE_FLOAT16_VALUE( 469.75), SIMDE_FLOAT16_VALUE( -543.50), SIMDE_FLOAT16_VALUE( 827.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-55072.00), SIMDE_FLOAT16_VALUE(-55328.00), SIMDE_FLOAT16_VALUE( 17728.00), SIMDE_FLOAT16_VALUE( 16464.00) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + + // simde_float16x4_t r = simde_vcmla_rot180_laneq_f16(r_, a, b, test_vec[i].lane); + // simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + + simde_float16x4_t r; + SIMDE_CONSTIFY_2_(simde_vcmla_rot180_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x4_t r = simde_vcmla_rot180_laneq_f16(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmla_rot180_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32_t r_[2]; + simde_float32_t a[2]; + simde_float32_t b[4]; + const int lane; + simde_float32_t r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( -470.26), SIMDE_FLOAT32_C( 81.87) }, + { SIMDE_FLOAT32_C( 72.27), SIMDE_FLOAT32_C( -992.59) }, + { SIMDE_FLOAT32_C( -615.67), SIMDE_FLOAT32_C( 620.28), SIMDE_FLOAT32_C( -606.51), SIMDE_FLOAT32_C( -327.77) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(44024.207031), SIMDE_FLOAT32_C(44576.335938) } }, + { { SIMDE_FLOAT32_C( -331.80), SIMDE_FLOAT32_C( -605.94) }, + { SIMDE_FLOAT32_C( -295.86), SIMDE_FLOAT32_C( -275.42) }, + { SIMDE_FLOAT32_C( -752.64), SIMDE_FLOAT32_C( 21.91), SIMDE_FLOAT32_C( 826.83), SIMDE_FLOAT32_C( 599.82) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(6150.492188), SIMDE_FLOAT32_C(5876.352051) } }, + { { SIMDE_FLOAT32_C( -30.36), SIMDE_FLOAT32_C( 631.53) }, + { SIMDE_FLOAT32_C( 850.75), SIMDE_FLOAT32_C( -263.55) }, + { SIMDE_FLOAT32_C( 139.96), SIMDE_FLOAT32_C( 859.14), SIMDE_FLOAT32_C( -834.47), SIMDE_FLOAT32_C( 216.10) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-119101.335938), SIMDE_FLOAT32_C(-118439.445312) } }, + { { SIMDE_FLOAT32_C( 995.86), SIMDE_FLOAT32_C( 529.74) }, + { SIMDE_FLOAT32_C( 79.08), SIMDE_FLOAT32_C( 947.13) }, + { SIMDE_FLOAT32_C( 122.02), SIMDE_FLOAT32_C( -250.00), SIMDE_FLOAT32_C( -361.82), SIMDE_FLOAT32_C( 265.24) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(20765.861328), SIMDE_FLOAT32_C(20299.740234) } }, + { { SIMDE_FLOAT32_C( 275.71), SIMDE_FLOAT32_C( 2.71) }, + { SIMDE_FLOAT32_C( 99.79), SIMDE_FLOAT32_C( -137.67) }, + { SIMDE_FLOAT32_C( -761.19), SIMDE_FLOAT32_C( 813.19), SIMDE_FLOAT32_C( -897.68), SIMDE_FLOAT32_C( 653.58) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(76234.859375), SIMDE_FLOAT32_C(75961.859375) } }, + { { SIMDE_FLOAT32_C( 396.02), SIMDE_FLOAT32_C( 413.06) }, + { SIMDE_FLOAT32_C( 514.09), SIMDE_FLOAT32_C( -977.67) }, + { SIMDE_FLOAT32_C( -671.79), SIMDE_FLOAT32_C( -92.13), SIMDE_FLOAT32_C( -441.32), SIMDE_FLOAT32_C( -374.27) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(47759.132812), SIMDE_FLOAT32_C(47776.171875) } }, + { { SIMDE_FLOAT32_C( -151.97), SIMDE_FLOAT32_C( -79.55) }, + { SIMDE_FLOAT32_C( -214.62), SIMDE_FLOAT32_C( -614.75) }, + { SIMDE_FLOAT32_C( 678.79), SIMDE_FLOAT32_C( 783.83), SIMDE_FLOAT32_C( 493.05), SIMDE_FLOAT32_C( -896.00) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(145529.937500), SIMDE_FLOAT32_C(145602.359375) } }, + { { SIMDE_FLOAT32_C( 104.13), SIMDE_FLOAT32_C( 278.54) }, + { SIMDE_FLOAT32_C( 171.54), SIMDE_FLOAT32_C( -682.63) }, + { SIMDE_FLOAT32_C( 217.09), SIMDE_FLOAT32_C( 49.35), SIMDE_FLOAT32_C( 256.50), SIMDE_FLOAT32_C( -92.04) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-8361.368164), SIMDE_FLOAT32_C(-8186.958496) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); + simde_float32x2_t r; + SIMDE_CONSTIFY_2_(simde_vcmla_rot180_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + + simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); + // simde_float32x2_t r = simde_vcmla_rot180_laneq_f32(r_, a, b, test_vec[i].lane); + // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x2_t r = simde_vcmla_rot180_laneq_f32(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmlaq_rot180_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t r_[8]; + simde_float16_t a[8]; + simde_float16_t b[4]; + const int lane; + simde_float16_t r[8]; + } test_vec[] = { + + { { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( 420.50), + SIMDE_FLOAT16_VALUE( -699.00), SIMDE_FLOAT16_VALUE( -151.12), SIMDE_FLOAT16_VALUE( -388.25), SIMDE_FLOAT16_VALUE( 350.75) }, + { SIMDE_FLOAT16_VALUE( 890.50), SIMDE_FLOAT16_VALUE( -665.00), SIMDE_FLOAT16_VALUE( -618.00), SIMDE_FLOAT16_VALUE( 190.88), + SIMDE_FLOAT16_VALUE( 218.12), SIMDE_FLOAT16_VALUE( -329.00), SIMDE_FLOAT16_VALUE( -549.50), SIMDE_FLOAT16_VALUE( -460.00) }, + { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -695.50), SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 44096.00), SIMDE_FLOAT16_VALUE( 44064.00), SIMDE_FLOAT16_VALUE(-30272.00), SIMDE_FLOAT16_VALUE(-30032.00), + SIMDE_FLOAT16_VALUE( 10048.00), SIMDE_FLOAT16_VALUE( 10600.00), SIMDE_FLOAT16_VALUE(-27472.00), SIMDE_FLOAT16_VALUE(-26736.00) } }, + { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00), + SIMDE_FLOAT16_VALUE( -786.50), SIMDE_FLOAT16_VALUE( -974.00), SIMDE_FLOAT16_VALUE( -570.50), SIMDE_FLOAT16_VALUE( 369.25) }, + { SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( -40.00), SIMDE_FLOAT16_VALUE( -52.00), SIMDE_FLOAT16_VALUE( 75.88), + SIMDE_FLOAT16_VALUE( 352.25), SIMDE_FLOAT16_VALUE( 324.25), SIMDE_FLOAT16_VALUE( 485.50), SIMDE_FLOAT16_VALUE( 43.50) }, + { SIMDE_FLOAT16_VALUE( -976.00), SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -100.62), SIMDE_FLOAT16_VALUE( -463.75) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -5168.00), SIMDE_FLOAT16_VALUE( -5008.00), SIMDE_FLOAT16_VALUE( 5108.00), SIMDE_FLOAT16_VALUE( 5228.00), + SIMDE_FLOAT16_VALUE(-32288.00), SIMDE_FLOAT16_VALUE(-32480.00), SIMDE_FLOAT16_VALUE(-44000.00), SIMDE_FLOAT16_VALUE(-43040.00) } }, + { { SIMDE_FLOAT16_VALUE( -973.00), SIMDE_FLOAT16_VALUE( -159.62), SIMDE_FLOAT16_VALUE( -377.00), SIMDE_FLOAT16_VALUE( -853.00), + SIMDE_FLOAT16_VALUE( 831.00), SIMDE_FLOAT16_VALUE( 661.00), SIMDE_FLOAT16_VALUE( -717.00), SIMDE_FLOAT16_VALUE( 891.50) }, + { SIMDE_FLOAT16_VALUE( 535.50), SIMDE_FLOAT16_VALUE( 621.50), SIMDE_FLOAT16_VALUE( 578.50), SIMDE_FLOAT16_VALUE( 430.50), + SIMDE_FLOAT16_VALUE( -574.50), SIMDE_FLOAT16_VALUE( -319.75), SIMDE_FLOAT16_VALUE( -735.00), SIMDE_FLOAT16_VALUE( 248.50) }, + { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 192.38), SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 502.25) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-18432.00), SIMDE_FLOAT16_VALUE(-17616.00), SIMDE_FLOAT16_VALUE(-19232.00), SIMDE_FLOAT16_VALUE(-19712.00), + SIMDE_FLOAT16_VALUE( 19552.00), SIMDE_FLOAT16_VALUE( 19392.00), SIMDE_FLOAT16_VALUE( 23232.00), SIMDE_FLOAT16_VALUE( 24848.00) } }, + { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( -70.81), SIMDE_FLOAT16_VALUE( -596.50), SIMDE_FLOAT16_VALUE( 810.50), + SIMDE_FLOAT16_VALUE( 863.50), SIMDE_FLOAT16_VALUE( 828.50), SIMDE_FLOAT16_VALUE( -563.50), SIMDE_FLOAT16_VALUE( -576.50) }, + { SIMDE_FLOAT16_VALUE( -703.50), SIMDE_FLOAT16_VALUE( 384.00), SIMDE_FLOAT16_VALUE( -772.50), SIMDE_FLOAT16_VALUE( 457.50), + SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( 653.00), SIMDE_FLOAT16_VALUE( -121.00), SIMDE_FLOAT16_VALUE( 945.50) }, + { SIMDE_FLOAT16_VALUE( -280.75), SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( 688.50), SIMDE_FLOAT16_VALUE( 192.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-22752.00), SIMDE_FLOAT16_VALUE(-22192.00), SIMDE_FLOAT16_VALUE(-24896.00), SIMDE_FLOAT16_VALUE(-23488.00), + SIMDE_FLOAT16_VALUE( 10176.00), SIMDE_FLOAT16_VALUE( 10136.00), SIMDE_FLOAT16_VALUE( -4368.00), SIMDE_FLOAT16_VALUE( -4384.00) } }, + { { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -323.75), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -283.75), + SIMDE_FLOAT16_VALUE( -117.75), SIMDE_FLOAT16_VALUE( -841.50), SIMDE_FLOAT16_VALUE( 665.00), SIMDE_FLOAT16_VALUE( -987.00) }, + { SIMDE_FLOAT16_VALUE( -643.00), SIMDE_FLOAT16_VALUE( -152.12), SIMDE_FLOAT16_VALUE( 964.00), SIMDE_FLOAT16_VALUE( 920.00), + SIMDE_FLOAT16_VALUE( 630.50), SIMDE_FLOAT16_VALUE( -669.50), SIMDE_FLOAT16_VALUE( 671.00), SIMDE_FLOAT16_VALUE( 257.00) }, + { SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -857.50), SIMDE_FLOAT16_VALUE( 334.75), SIMDE_FLOAT16_VALUE( -617.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 6048.00), SIMDE_FLOAT16_VALUE( 6248.00), SIMDE_FLOAT16_VALUE(-10736.00), SIMDE_FLOAT16_VALUE(-10136.00), + SIMDE_FLOAT16_VALUE( -6560.00), SIMDE_FLOAT16_VALUE( -7284.00), SIMDE_FLOAT16_VALUE( -6192.00), SIMDE_FLOAT16_VALUE( -7844.00) } }, + { { SIMDE_FLOAT16_VALUE( -439.50), SIMDE_FLOAT16_VALUE( 245.12), SIMDE_FLOAT16_VALUE( 111.06), SIMDE_FLOAT16_VALUE( 520.50), + SIMDE_FLOAT16_VALUE( 85.50), SIMDE_FLOAT16_VALUE( 250.25), SIMDE_FLOAT16_VALUE( -680.00), SIMDE_FLOAT16_VALUE( -750.00) }, + { SIMDE_FLOAT16_VALUE( -138.25), SIMDE_FLOAT16_VALUE( -14.62), SIMDE_FLOAT16_VALUE( -921.50), SIMDE_FLOAT16_VALUE( 225.88), + SIMDE_FLOAT16_VALUE( 242.88), SIMDE_FLOAT16_VALUE( 869.50), SIMDE_FLOAT16_VALUE( 298.00), SIMDE_FLOAT16_VALUE( 105.69) }, + { SIMDE_FLOAT16_VALUE( -722.50), SIMDE_FLOAT16_VALUE( -8.75), SIMDE_FLOAT16_VALUE( -245.75), SIMDE_FLOAT16_VALUE( 915.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -1649.00), SIMDE_FLOAT16_VALUE( -964.50), SIMDE_FLOAT16_VALUE( -7952.00), SIMDE_FLOAT16_VALUE( -7544.00), + SIMDE_FLOAT16_VALUE( 2210.00), SIMDE_FLOAT16_VALUE( 2376.00), SIMDE_FLOAT16_VALUE( 1928.00), SIMDE_FLOAT16_VALUE( 1858.00) } }, + { { SIMDE_FLOAT16_VALUE( 54.19), SIMDE_FLOAT16_VALUE( -928.00), SIMDE_FLOAT16_VALUE( 362.50), SIMDE_FLOAT16_VALUE( -936.50), + SIMDE_FLOAT16_VALUE( 185.88), SIMDE_FLOAT16_VALUE( -244.38), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( -644.00) }, + { SIMDE_FLOAT16_VALUE( -517.00), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -751.50), SIMDE_FLOAT16_VALUE( -974.00), + SIMDE_FLOAT16_VALUE( -144.38), SIMDE_FLOAT16_VALUE( 338.25), SIMDE_FLOAT16_VALUE( 705.00), SIMDE_FLOAT16_VALUE( 116.88) }, + { SIMDE_FLOAT16_VALUE( 49.38), SIMDE_FLOAT16_VALUE( -363.00), SIMDE_FLOAT16_VALUE( -476.25), SIMDE_FLOAT16_VALUE( 106.69) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 25584.00), SIMDE_FLOAT16_VALUE( 24592.00), SIMDE_FLOAT16_VALUE( 37472.00), SIMDE_FLOAT16_VALUE( 36160.00), + SIMDE_FLOAT16_VALUE( 7316.00), SIMDE_FLOAT16_VALUE( 6884.00), SIMDE_FLOAT16_VALUE(-33888.00), SIMDE_FLOAT16_VALUE(-35456.00) } }, + { { SIMDE_FLOAT16_VALUE( -726.00), SIMDE_FLOAT16_VALUE( -353.75), SIMDE_FLOAT16_VALUE( 268.50), SIMDE_FLOAT16_VALUE( 729.00), + SIMDE_FLOAT16_VALUE( -470.25), SIMDE_FLOAT16_VALUE( 81.88), SIMDE_FLOAT16_VALUE( 72.25), SIMDE_FLOAT16_VALUE( -992.50) }, + { SIMDE_FLOAT16_VALUE( -615.50), SIMDE_FLOAT16_VALUE( 620.50), SIMDE_FLOAT16_VALUE( -606.50), SIMDE_FLOAT16_VALUE( -327.75), + SIMDE_FLOAT16_VALUE( -331.75), SIMDE_FLOAT16_VALUE( -606.00), SIMDE_FLOAT16_VALUE( -295.75), SIMDE_FLOAT16_VALUE( -275.50) }, + { SIMDE_FLOAT16_VALUE( -752.50), SIMDE_FLOAT16_VALUE( 21.91), SIMDE_FLOAT16_VALUE( 827.00), SIMDE_FLOAT16_VALUE( 600.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 12760.00), SIMDE_FLOAT16_VALUE( 13128.00), SIMDE_FLOAT16_VALUE( 13552.00), SIMDE_FLOAT16_VALUE( 14016.00), + SIMDE_FLOAT16_VALUE( 6796.00), SIMDE_FLOAT16_VALUE( 7348.00), SIMDE_FLOAT16_VALUE( 6552.00), SIMDE_FLOAT16_VALUE( 5488.00) } } + + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x8_t r; + SIMDE_CONSTIFY_2_(simde_vcmlaq_rot180_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + // simde_float16x8_t r = simde_vcmlaq_rot180_lane_f16(r_, a, b, test_vec[i].lane); + // simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x8_t r = simde_vcmlaq_lane_f16(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmlaq_rot180_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32_t r_[4]; + simde_float32_t a[4]; + simde_float32_t b[2]; + const int lane; + simde_float32_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 301.65), SIMDE_FLOAT32_C( 490.71), SIMDE_FLOAT32_C( -744.66), SIMDE_FLOAT32_C( -738.17) }, + { SIMDE_FLOAT32_C( -301.20), SIMDE_FLOAT32_C( -904.34), SIMDE_FLOAT32_C( 771.98), SIMDE_FLOAT32_C( 233.71) }, + { SIMDE_FLOAT32_C( 830.18), SIMDE_FLOAT32_C( 979.39) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(250351.875000), SIMDE_FLOAT32_C(250540.937500), SIMDE_FLOAT32_C(-641627.000000), SIMDE_FLOAT32_C(-641620.500000) } }, + { { SIMDE_FLOAT32_C( -38.01), SIMDE_FLOAT32_C( 151.61), SIMDE_FLOAT32_C( 201.45), SIMDE_FLOAT32_C( -747.32) }, + { SIMDE_FLOAT32_C( -331.17), SIMDE_FLOAT32_C( 7.62), SIMDE_FLOAT32_C( -454.77), SIMDE_FLOAT32_C( -381.81) }, + { SIMDE_FLOAT32_C( 236.39), SIMDE_FLOAT32_C( -158.94) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(78247.265625), SIMDE_FLOAT32_C(78436.890625), SIMDE_FLOAT32_C(107704.531250), SIMDE_FLOAT32_C(106755.757812) } }, + { { SIMDE_FLOAT32_C( 605.85), SIMDE_FLOAT32_C( 244.27), SIMDE_FLOAT32_C( -426.53), SIMDE_FLOAT32_C( -969.18) }, + { SIMDE_FLOAT32_C( 322.13), SIMDE_FLOAT32_C( 863.77), SIMDE_FLOAT32_C( -685.35), SIMDE_FLOAT32_C( -710.70) }, + { SIMDE_FLOAT32_C( -791.12), SIMDE_FLOAT32_C( 373.53) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(255449.343750), SIMDE_FLOAT32_C(255087.765625), SIMDE_FLOAT32_C(-542620.625000), SIMDE_FLOAT32_C(-543163.250000) } }, + { { SIMDE_FLOAT32_C( -606.46), SIMDE_FLOAT32_C( -507.55), SIMDE_FLOAT32_C( -68.24), SIMDE_FLOAT32_C( -823.05) }, + { SIMDE_FLOAT32_C( -359.95), SIMDE_FLOAT32_C( 611.92), SIMDE_FLOAT32_C( 514.14), SIMDE_FLOAT32_C( -660.86) }, + { SIMDE_FLOAT32_C( 181.81), SIMDE_FLOAT32_C( 115.86) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(64836.050781), SIMDE_FLOAT32_C(64934.960938), SIMDE_FLOAT32_C(-93544.031250), SIMDE_FLOAT32_C(-94298.843750) } }, + { { SIMDE_FLOAT32_C( 951.84), SIMDE_FLOAT32_C( -864.96), SIMDE_FLOAT32_C( 970.59), SIMDE_FLOAT32_C( 769.97) }, + { SIMDE_FLOAT32_C( 609.64), SIMDE_FLOAT32_C( -580.87), SIMDE_FLOAT32_C( 358.59), SIMDE_FLOAT32_C( 350.56) }, + { SIMDE_FLOAT32_C( 26.31), SIMDE_FLOAT32_C( 163.74) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-15087.788086), SIMDE_FLOAT32_C(-16904.587891), SIMDE_FLOAT32_C(-8463.912109), SIMDE_FLOAT32_C(-8664.532227) } }, + { { SIMDE_FLOAT32_C( -636.68), SIMDE_FLOAT32_C( 554.68), SIMDE_FLOAT32_C( -385.40), SIMDE_FLOAT32_C( -565.95) }, + { SIMDE_FLOAT32_C( 129.69), SIMDE_FLOAT32_C( 961.79), SIMDE_FLOAT32_C( -333.22), SIMDE_FLOAT32_C( 69.65) }, + { SIMDE_FLOAT32_C( 726.96), SIMDE_FLOAT32_C( 131.41) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-94916.125000), SIMDE_FLOAT32_C(-93724.765625), SIMDE_FLOAT32_C(241852.218750), SIMDE_FLOAT32_C(241671.671875) } }, + { { SIMDE_FLOAT32_C( -211.91), SIMDE_FLOAT32_C( 829.24), SIMDE_FLOAT32_C( -475.13), SIMDE_FLOAT32_C( 562.33) }, + { SIMDE_FLOAT32_C( 515.59), SIMDE_FLOAT32_C( -290.69), SIMDE_FLOAT32_C( -816.53), SIMDE_FLOAT32_C( 17.39) }, + { SIMDE_FLOAT32_C( 268.14), SIMDE_FLOAT32_C( 729.88) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-138462.234375), SIMDE_FLOAT32_C(-137421.078125), SIMDE_FLOAT32_C(218469.250000), SIMDE_FLOAT32_C(219506.703125) } }, + { { SIMDE_FLOAT32_C( -894.99), SIMDE_FLOAT32_C( 516.42), SIMDE_FLOAT32_C( -169.55), SIMDE_FLOAT32_C( 696.41) }, + { SIMDE_FLOAT32_C( -388.51), SIMDE_FLOAT32_C( 987.71), SIMDE_FLOAT32_C( -91.49), SIMDE_FLOAT32_C( -970.85) }, + { SIMDE_FLOAT32_C( 357.28), SIMDE_FLOAT32_C( -28.01) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(137911.859375), SIMDE_FLOAT32_C(139323.281250), SIMDE_FLOAT32_C(32517.996094), SIMDE_FLOAT32_C(33383.957031) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); + simde_float32x4_t r = simde_vcmlaq_rot180_lane_f32(r_, a, b, 0); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x4_t r = simde_vcmlaq_lane_f32(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmlaq_rot180_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t r_[8]; + simde_float16_t a[8]; + simde_float16_t b[8]; + const int lane; + simde_float16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( -30.36), SIMDE_FLOAT16_VALUE( 631.50), SIMDE_FLOAT16_VALUE( 851.00), SIMDE_FLOAT16_VALUE( -263.50), + SIMDE_FLOAT16_VALUE( 140.00), SIMDE_FLOAT16_VALUE( 859.00), SIMDE_FLOAT16_VALUE( -834.50), SIMDE_FLOAT16_VALUE( 216.12) }, + { SIMDE_FLOAT16_VALUE( 996.00), SIMDE_FLOAT16_VALUE( 529.50), SIMDE_FLOAT16_VALUE( 79.06), SIMDE_FLOAT16_VALUE( 947.00), + SIMDE_FLOAT16_VALUE( 122.00), SIMDE_FLOAT16_VALUE( -250.00), SIMDE_FLOAT16_VALUE( -361.75), SIMDE_FLOAT16_VALUE( 265.25) }, + { SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 2.71), SIMDE_FLOAT16_VALUE( 99.81), SIMDE_FLOAT16_VALUE( -137.62), + SIMDE_FLOAT16_VALUE( -761.00), SIMDE_FLOAT16_VALUE( 813.00), SIMDE_FLOAT16_VALUE( -897.50), SIMDE_FLOAT16_VALUE( 653.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-58464.00), SIMDE_FLOAT16_VALUE(-57792.00), SIMDE_FLOAT16_VALUE( -3786.00), SIMDE_FLOAT16_VALUE( -4900.00), + SIMDE_FLOAT16_VALUE( -7016.00), SIMDE_FLOAT16_VALUE( -6296.00), SIMDE_FLOAT16_VALUE( 20384.00), SIMDE_FLOAT16_VALUE( 21440.00) } }, + { { SIMDE_FLOAT16_VALUE( 396.00), SIMDE_FLOAT16_VALUE( 413.00), SIMDE_FLOAT16_VALUE( 514.00), SIMDE_FLOAT16_VALUE( -977.50), + SIMDE_FLOAT16_VALUE( -672.00), SIMDE_FLOAT16_VALUE( -92.12), SIMDE_FLOAT16_VALUE( -441.25), SIMDE_FLOAT16_VALUE( -374.25) }, + { SIMDE_FLOAT16_VALUE( -152.00), SIMDE_FLOAT16_VALUE( -79.56), SIMDE_FLOAT16_VALUE( -214.62), SIMDE_FLOAT16_VALUE( -615.00), + SIMDE_FLOAT16_VALUE( 79.00), SIMDE_FLOAT16_VALUE( 84.00), SIMDE_FLOAT16_VALUE( 493.00), SIMDE_FLOAT16_VALUE( -96.00) }, + { SIMDE_FLOAT16_VALUE( 104.12), SIMDE_FLOAT16_VALUE( 78.50), SIMDE_FLOAT16_VALUE( 171.50), SIMDE_FLOAT16_VALUE( -682.50), + SIMDE_FLOAT16_VALUE( 217.12), SIMDE_FLOAT16_VALUE( 49.34), SIMDE_FLOAT16_VALUE( 256.50), SIMDE_FLOAT16_VALUE( -92.06) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 12328.00), SIMDE_FLOAT16_VALUE( 12344.00), SIMDE_FLOAT16_VALUE( 17360.00), SIMDE_FLOAT16_VALUE( 15872.00), + SIMDE_FLOAT16_VALUE( -6872.00), SIMDE_FLOAT16_VALUE( -6292.00), SIMDE_FLOAT16_VALUE(-39136.00), SIMDE_FLOAT16_VALUE(-39072.00) } }, + { { SIMDE_FLOAT16_VALUE( -728.00), SIMDE_FLOAT16_VALUE( -108.38), SIMDE_FLOAT16_VALUE( -77.88), SIMDE_FLOAT16_VALUE( -353.00), + SIMDE_FLOAT16_VALUE( -239.00), SIMDE_FLOAT16_VALUE( 704.50), SIMDE_FLOAT16_VALUE( 914.00), SIMDE_FLOAT16_VALUE( -211.12) }, + { SIMDE_FLOAT16_VALUE( -473.25), SIMDE_FLOAT16_VALUE( 74.38), SIMDE_FLOAT16_VALUE( 904.50), SIMDE_FLOAT16_VALUE( -290.50), + SIMDE_FLOAT16_VALUE( -796.00), SIMDE_FLOAT16_VALUE( 421.25), SIMDE_FLOAT16_VALUE( 215.75), SIMDE_FLOAT16_VALUE( 249.38) }, + { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -720.00), SIMDE_FLOAT16_VALUE( 32.75), SIMDE_FLOAT16_VALUE( -487.75), + SIMDE_FLOAT16_VALUE( -705.50), SIMDE_FLOAT16_VALUE( -468.00), SIMDE_FLOAT16_VALUE( -789.00), SIMDE_FLOAT16_VALUE( -866.00) }, + INT32_C( 2), + { SIMDE_FLOAT16_VALUE( 14768.00), SIMDE_FLOAT16_VALUE( 15392.00), SIMDE_FLOAT16_VALUE(-29696.00), SIMDE_FLOAT16_VALUE(-29968.00), + SIMDE_FLOAT16_VALUE( 25824.00), SIMDE_FLOAT16_VALUE( 26768.00), SIMDE_FLOAT16_VALUE( -6152.00), SIMDE_FLOAT16_VALUE( -7276.00) } }, + { { SIMDE_FLOAT16_VALUE( -891.50), SIMDE_FLOAT16_VALUE( -299.00), SIMDE_FLOAT16_VALUE( -595.00), SIMDE_FLOAT16_VALUE( -662.00), + SIMDE_FLOAT16_VALUE( -914.00), SIMDE_FLOAT16_VALUE( 674.50), SIMDE_FLOAT16_VALUE( 771.50), SIMDE_FLOAT16_VALUE( 14.33) }, + { SIMDE_FLOAT16_VALUE( 880.00), SIMDE_FLOAT16_VALUE( 767.00), SIMDE_FLOAT16_VALUE( -738.50), SIMDE_FLOAT16_VALUE( 581.50), + SIMDE_FLOAT16_VALUE( -342.00), SIMDE_FLOAT16_VALUE( 580.50), SIMDE_FLOAT16_VALUE( 534.00), SIMDE_FLOAT16_VALUE( -671.00) }, + { SIMDE_FLOAT16_VALUE( -482.75), SIMDE_FLOAT16_VALUE( 382.25), SIMDE_FLOAT16_VALUE( 503.00), SIMDE_FLOAT16_VALUE( 35.00), + SIMDE_FLOAT16_VALUE( 315.50), SIMDE_FLOAT16_VALUE( -23.56), SIMDE_FLOAT16_VALUE( 53.88), SIMDE_FLOAT16_VALUE( 722.00) }, + INT32_C( 3), + { SIMDE_FLOAT16_VALUE(-31696.00), SIMDE_FLOAT16_VALUE(-31104.00), SIMDE_FLOAT16_VALUE( 25248.00), SIMDE_FLOAT16_VALUE( 25184.00), + SIMDE_FLOAT16_VALUE( 11056.00), SIMDE_FLOAT16_VALUE( 12648.00), SIMDE_FLOAT16_VALUE(-17920.00), SIMDE_FLOAT16_VALUE(-18672.00) } }, + { { SIMDE_FLOAT16_VALUE( 525.50), SIMDE_FLOAT16_VALUE( -679.00), SIMDE_FLOAT16_VALUE( 491.50), SIMDE_FLOAT16_VALUE( -505.00), + SIMDE_FLOAT16_VALUE( 914.50), SIMDE_FLOAT16_VALUE( -312.00), SIMDE_FLOAT16_VALUE( -404.50), SIMDE_FLOAT16_VALUE( -634.00) }, + { SIMDE_FLOAT16_VALUE( -86.62), SIMDE_FLOAT16_VALUE( -914.50), SIMDE_FLOAT16_VALUE( -839.50), SIMDE_FLOAT16_VALUE( 817.50), + SIMDE_FLOAT16_VALUE( -187.25), SIMDE_FLOAT16_VALUE( 422.75), SIMDE_FLOAT16_VALUE( 604.50), SIMDE_FLOAT16_VALUE( -735.00) }, + { SIMDE_FLOAT16_VALUE( -21.31), SIMDE_FLOAT16_VALUE( -29.59), SIMDE_FLOAT16_VALUE( -725.00), SIMDE_FLOAT16_VALUE( -503.00), + SIMDE_FLOAT16_VALUE( -75.56), SIMDE_FLOAT16_VALUE( 215.38), SIMDE_FLOAT16_VALUE( -742.00), SIMDE_FLOAT16_VALUE( -854.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( -1321.00), SIMDE_FLOAT16_VALUE( -2526.00), SIMDE_FLOAT16_VALUE(-17408.00), SIMDE_FLOAT16_VALUE(-18400.00), + SIMDE_FLOAT16_VALUE( -3076.00), SIMDE_FLOAT16_VALUE( -4304.00), SIMDE_FLOAT16_VALUE( 12480.00), SIMDE_FLOAT16_VALUE( 12248.00) } }, + { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( 164.88), SIMDE_FLOAT16_VALUE( 304.75), SIMDE_FLOAT16_VALUE( -530.50), + SIMDE_FLOAT16_VALUE( -603.50), SIMDE_FLOAT16_VALUE( 730.50), SIMDE_FLOAT16_VALUE( 46.66), SIMDE_FLOAT16_VALUE( 629.00) }, + { SIMDE_FLOAT16_VALUE( -537.00), SIMDE_FLOAT16_VALUE( 637.00), SIMDE_FLOAT16_VALUE( 884.50), SIMDE_FLOAT16_VALUE( 378.25), + SIMDE_FLOAT16_VALUE( -10.17), SIMDE_FLOAT16_VALUE( -730.00), SIMDE_FLOAT16_VALUE( -981.50), SIMDE_FLOAT16_VALUE( 453.25) }, + { SIMDE_FLOAT16_VALUE( -575.00), SIMDE_FLOAT16_VALUE( 12.88), SIMDE_FLOAT16_VALUE( -667.50), SIMDE_FLOAT16_VALUE( 380.50), + SIMDE_FLOAT16_VALUE( 374.75), SIMDE_FLOAT16_VALUE( -222.50), SIMDE_FLOAT16_VALUE( 206.88), SIMDE_FLOAT16_VALUE( 502.25) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 6300.00), SIMDE_FLOAT16_VALUE( 7084.00), SIMDE_FLOAT16_VALUE(-11088.00), SIMDE_FLOAT16_VALUE(-11928.00), + SIMDE_FLOAT16_VALUE( -472.50), SIMDE_FLOAT16_VALUE( 861.50), SIMDE_FLOAT16_VALUE( 12688.00), SIMDE_FLOAT16_VALUE( 13272.00) } }, + { { SIMDE_FLOAT16_VALUE( -825.50), SIMDE_FLOAT16_VALUE( -472.75), SIMDE_FLOAT16_VALUE( -531.00), SIMDE_FLOAT16_VALUE( -366.75), + SIMDE_FLOAT16_VALUE( 143.12), SIMDE_FLOAT16_VALUE( 698.50), SIMDE_FLOAT16_VALUE( 700.00), SIMDE_FLOAT16_VALUE( 498.25) }, + { SIMDE_FLOAT16_VALUE( 908.00), SIMDE_FLOAT16_VALUE( 845.50), SIMDE_FLOAT16_VALUE( -383.50), SIMDE_FLOAT16_VALUE( 383.50), + SIMDE_FLOAT16_VALUE( 357.75), SIMDE_FLOAT16_VALUE( -900.50), SIMDE_FLOAT16_VALUE( -802.00), SIMDE_FLOAT16_VALUE( 966.50) }, + { SIMDE_FLOAT16_VALUE( -993.00), SIMDE_FLOAT16_VALUE( 477.50), SIMDE_FLOAT16_VALUE( -23.00), SIMDE_FLOAT16_VALUE( 102.38), + SIMDE_FLOAT16_VALUE( 988.50), SIMDE_FLOAT16_VALUE( -311.75), SIMDE_FLOAT16_VALUE( -668.50), SIMDE_FLOAT16_VALUE( 148.25) }, + INT32_C( 2), + { SIMDE_FLOAT16_VALUE( 20064.00), SIMDE_FLOAT16_VALUE( 20416.00), SIMDE_FLOAT16_VALUE( -9352.00), SIMDE_FLOAT16_VALUE( -9184.00), + SIMDE_FLOAT16_VALUE( 8368.00), SIMDE_FLOAT16_VALUE( 8928.00), SIMDE_FLOAT16_VALUE(-17744.00), SIMDE_FLOAT16_VALUE(-17952.00) } }, + { { SIMDE_FLOAT16_VALUE( 213.88), SIMDE_FLOAT16_VALUE( 337.75), SIMDE_FLOAT16_VALUE( 330.50), SIMDE_FLOAT16_VALUE( -88.56), + SIMDE_FLOAT16_VALUE( 191.12), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 775.50) }, + { SIMDE_FLOAT16_VALUE( 295.50), SIMDE_FLOAT16_VALUE( 687.00), SIMDE_FLOAT16_VALUE( 406.25), SIMDE_FLOAT16_VALUE( 439.50), + SIMDE_FLOAT16_VALUE( -827.50), SIMDE_FLOAT16_VALUE( 733.00), SIMDE_FLOAT16_VALUE( 499.00), SIMDE_FLOAT16_VALUE( 931.00) }, + { SIMDE_FLOAT16_VALUE( 790.00), SIMDE_FLOAT16_VALUE( -979.00), SIMDE_FLOAT16_VALUE( 70.62), SIMDE_FLOAT16_VALUE( -47.00), + SIMDE_FLOAT16_VALUE( 228.50), SIMDE_FLOAT16_VALUE( -233.50), SIMDE_FLOAT16_VALUE( -467.50), SIMDE_FLOAT16_VALUE( 545.00) }, + INT32_C( 3), + { SIMDE_FLOAT16_VALUE( 14104.00), SIMDE_FLOAT16_VALUE( 14224.00), SIMDE_FLOAT16_VALUE( 19424.00), SIMDE_FLOAT16_VALUE( 19008.00), + SIMDE_FLOAT16_VALUE(-38688.00), SIMDE_FLOAT16_VALUE(-39520.00), SIMDE_FLOAT16_VALUE( 23344.00), SIMDE_FLOAT16_VALUE( 24224.00) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + + // simde_float16x8_t r = simde_vcmlaq_rot180_laneq_f16(r_, a, b, test_vec[i].lane); + // simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + + simde_float16x8_t r; + SIMDE_CONSTIFY_4_(simde_vcmlaq_rot180_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t r = simde_vcmlaq_laneq_f16(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmlaq_rot180_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32_t r_[4]; + simde_float32_t a[4]; + simde_float32_t b[4]; + const int lane; + simde_float32_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 355.18), SIMDE_FLOAT32_C( 169.63), SIMDE_FLOAT32_C( 116.87), SIMDE_FLOAT32_C( -467.19) }, + { SIMDE_FLOAT32_C( -513.94), SIMDE_FLOAT32_C( -999.71), SIMDE_FLOAT32_C( -285.25), SIMDE_FLOAT32_C( -931.88) }, + { SIMDE_FLOAT32_C( -839.57), SIMDE_FLOAT32_C( -681.40), SIMDE_FLOAT32_C( -117.60), SIMDE_FLOAT32_C( -459.86) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-431133.437500), SIMDE_FLOAT32_C(-431318.968750), SIMDE_FLOAT32_C(-239370.468750), SIMDE_FLOAT32_C(-239954.531250) } }, + { { SIMDE_FLOAT32_C( -57.67), SIMDE_FLOAT32_C( -897.07), SIMDE_FLOAT32_C( 118.98), SIMDE_FLOAT32_C( -387.92) }, + { SIMDE_FLOAT32_C( -362.79), SIMDE_FLOAT32_C( 160.99), SIMDE_FLOAT32_C( -2.72), SIMDE_FLOAT32_C( 206.65) }, + { SIMDE_FLOAT32_C( 49.34), SIMDE_FLOAT32_C( 511.85), SIMDE_FLOAT32_C( 547.20), SIMDE_FLOAT32_C( -119.58) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(185636.390625), SIMDE_FLOAT32_C(184797.000000), SIMDE_FLOAT32_C(1511.212036), SIMDE_FLOAT32_C(1004.312012) } }, + { { SIMDE_FLOAT32_C( -219.54), SIMDE_FLOAT32_C( -959.14), SIMDE_FLOAT32_C( 943.92), SIMDE_FLOAT32_C( 628.48) }, + { SIMDE_FLOAT32_C( 446.65), SIMDE_FLOAT32_C( -500.77), SIMDE_FLOAT32_C( -347.79), SIMDE_FLOAT32_C( 813.11) }, + { SIMDE_FLOAT32_C( -542.25), SIMDE_FLOAT32_C( 232.48), SIMDE_FLOAT32_C( 684.35), SIMDE_FLOAT32_C( 710.26) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(241976.421875), SIMDE_FLOAT32_C(241236.812500), SIMDE_FLOAT32_C(-187645.218750), SIMDE_FLOAT32_C(-187960.656250) } }, + { { SIMDE_FLOAT32_C( 783.09), SIMDE_FLOAT32_C( -727.02), SIMDE_FLOAT32_C( -586.46), SIMDE_FLOAT32_C( 64.33) }, + { SIMDE_FLOAT32_C( -490.08), SIMDE_FLOAT32_C( 740.49), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -759.78) }, + { SIMDE_FLOAT32_C( -380.84), SIMDE_FLOAT32_C( 993.01), SIMDE_FLOAT32_C( -759.56), SIMDE_FLOAT32_C( 861.16) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(487437.437500), SIMDE_FLOAT32_C(485927.312500), SIMDE_FLOAT32_C(586838.562500), SIMDE_FLOAT32_C(587489.312500) } }, + { { SIMDE_FLOAT32_C( 998.31), SIMDE_FLOAT32_C( 538.40), SIMDE_FLOAT32_C( -191.12), SIMDE_FLOAT32_C( -434.48) }, + { SIMDE_FLOAT32_C( 592.83), SIMDE_FLOAT32_C( 820.32), SIMDE_FLOAT32_C( -296.84), SIMDE_FLOAT32_C( -612.30) }, + { SIMDE_FLOAT32_C( -552.34), SIMDE_FLOAT32_C( 329.08), SIMDE_FLOAT32_C( 765.26), SIMDE_FLOAT32_C( -531.08) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(328442.062500), SIMDE_FLOAT32_C(327982.156250), SIMDE_FLOAT32_C(-164147.734375), SIMDE_FLOAT32_C(-164391.093750) } }, + { { SIMDE_FLOAT32_C( 52.61), SIMDE_FLOAT32_C( 606.93), SIMDE_FLOAT32_C( -894.22), SIMDE_FLOAT32_C( -854.38) }, + { SIMDE_FLOAT32_C( 972.80), SIMDE_FLOAT32_C( -807.39), SIMDE_FLOAT32_C( 668.59), SIMDE_FLOAT32_C( -228.19) }, + { SIMDE_FLOAT32_C( -528.51), SIMDE_FLOAT32_C( 730.93), SIMDE_FLOAT32_C( -230.95), SIMDE_FLOAT32_C( -140.17) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-710996.062500), SIMDE_FLOAT32_C(-710441.750000), SIMDE_FLOAT32_C(-489586.718750), SIMDE_FLOAT32_C(-489546.875000) } }, + { { SIMDE_FLOAT32_C( 556.73), SIMDE_FLOAT32_C( -701.90), SIMDE_FLOAT32_C( -356.50), SIMDE_FLOAT32_C( -532.42) }, + { SIMDE_FLOAT32_C( 856.94), SIMDE_FLOAT32_C( -261.67), SIMDE_FLOAT32_C( -208.07), SIMDE_FLOAT32_C( 27.93) }, + { SIMDE_FLOAT32_C( 924.32), SIMDE_FLOAT32_C( -863.60), SIMDE_FLOAT32_C( -687.65), SIMDE_FLOAT32_C( 238.39) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-791530.062500), SIMDE_FLOAT32_C(-792788.687500), SIMDE_FLOAT32_C(191966.765625), SIMDE_FLOAT32_C(191790.843750) } }, + { { SIMDE_FLOAT32_C( -286.79), SIMDE_FLOAT32_C( 630.61), SIMDE_FLOAT32_C( -989.22), SIMDE_FLOAT32_C( 223.21) }, + { SIMDE_FLOAT32_C( 812.31), SIMDE_FLOAT32_C( 667.33), SIMDE_FLOAT32_C( 841.41), SIMDE_FLOAT32_C( 735.52) }, + { SIMDE_FLOAT32_C( 308.52), SIMDE_FLOAT32_C( -189.06), SIMDE_FLOAT32_C( -63.33), SIMDE_FLOAT32_C( 837.76) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(153288.531250), SIMDE_FLOAT32_C(154205.937500), SIMDE_FLOAT32_C(158087.750000), SIMDE_FLOAT32_C(159300.171875) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); + simde_float32x4_t r; + SIMDE_CONSTIFY_2_(simde_vcmlaq_rot180_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + + // simde_float32x4_t r = simde_vcmlaq_rot180_laneq_f32(r_, a, b, test_vec[i].lane); + // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t r = simde_vcmlaq_rot180_laneq_f32(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vcmla_rot180_lane_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmla_rot180_lane_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmla_rot180_laneq_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmla_rot180_laneq_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_rot180_lane_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_rot180_lane_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_rot180_laneq_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_rot180_laneq_f32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" \ No newline at end of file From 560be04f6364b2167a6b06a2ba86139ecfd2f088 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Mon, 16 Oct 2023 17:40:13 +0800 Subject: [PATCH 06/29] [Neon] Add vcadd_rot270_f{16/32} and vcaddq_rot270_f{16/32/64} --- simde/arm/neon/cmla_rot270_lane.h | 325 +++++++++++++ test/arm/neon/cmla_rot270_lane.c | 754 ++++++++++++++++++++++++++++++ 2 files changed, 1079 insertions(+) create mode 100644 simde/arm/neon/cmla_rot270_lane.h create mode 100644 test/arm/neon/cmla_rot270_lane.c diff --git a/simde/arm/neon/cmla_rot270_lane.h b/simde/arm/neon/cmla_rot270_lane.h new file mode 100644 index 000000000..08d8de172 --- /dev/null +++ b/simde/arm/neon/cmla_rot270_lane.h @@ -0,0 +1,325 @@ +/* SPDX-License-Identifier: MIT +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, copy, +* modify, merge, publish, distribute, sublicense, and/or sell copies +* of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +* +* Copyright: +* 2023 Chi-Wei Chu +*/ + +#if !defined(SIMDE_ARM_NEON_CMLA_ROT270_LANE_H) +#define SIMDE_ARM_NEON_CMLA_ROT270_LANE_H + +#include "types.h" +#include "dup_lane.h" +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vcmla_rot270_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float16x4_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_2_(vcmla_rot270_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float16x4_private + r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } + #endif + + result = simde_float16x4_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmla_rot270_lane_f16 + #define vcmla_rot270_lane_f16(r, a, b, lane) simde_vcmla_rot270_lane_f16(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcmla_rot270_lane_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcmla_rot270_lane_f32(r, a, b, 0); + #else + simde_float32x2_private + r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x2_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmla_rot270_lane_f32 + #define vcmla_rot270_lane_f32(r, a, b, lane) simde_vcmla_rot270_lane_f32(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float16x8_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_2_(vcmlaq_rot270_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float16x8_private + r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16(simde_float16x4_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, 13, 4, 15, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } + #endif + + result = simde_float16x8_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_rot270_lane_f16 + #define vcmlaq_rot270_lane_f16(r, a, b, lane) simde_vcmlaq_rot270_lane_f16(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcmlaq_rot270_lane_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcmlaq_rot270_lane_f32(r, a, b, 0); + #else + simde_float32x4_private + r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x2_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_rot270_lane_f32 + #define vcmlaq_rot270_lane_f32(r, a, b, lane) simde_vcmlaq_rot270_lane_f32(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vcmla_rot270_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float16x4_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_2_(vcmla_rot270_laneq_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float16x4_private + r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } + #endif + + result = simde_float16x4_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmla_rot270_laneq_f16 + #define vcmla_rot270_laneq_f16(r, a, b, lane) simde_vcmla_rot270_laneq_f16(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcmla_rot270_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x2_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_2_(vcmla_rot270_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float32x2_private + r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x4_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } + #endif + + result = simde_float32x2_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmla_rot270_laneq_f32 + #define vcmla_rot270_laneq_f32(r, a, b, lane) simde_vcmla_rot270_laneq_f32(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_float16x8_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_4_(vcmlaq_rot270_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float16x8_private + r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16(simde_float16x8_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, 13, 4, 15, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } + #endif + + result = simde_float16x8_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_rot270_laneq_f16 + #define vcmlaq_rot270_laneq_f16(r, a, b, lane) simde_vcmlaq_rot270_laneq_f16(r, a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcmlaq_rot270_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x4_t result; + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + SIMDE_CONSTIFY_2_(vcmlaq_rot270_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #else + simde_float32x4_private + r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x4_to_private(b).values[lane])); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } + #endif + + result = simde_float32x4_from_private(r_); + return result; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_rot270_laneq_f32 + #define vcmlaq_rot270_laneq_f32(r, a, b, lane) simde_vcmlaq_rot270_laneq_f32(r, a, b, lane) +#endif + + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT270_LANE_H) */ \ No newline at end of file diff --git a/test/arm/neon/cmla_rot270_lane.c b/test/arm/neon/cmla_rot270_lane.c new file mode 100644 index 000000000..78027a830 --- /dev/null +++ b/test/arm/neon/cmla_rot270_lane.c @@ -0,0 +1,754 @@ +#define SIMDE_TEST_ARM_NEON_INSN cmla_rot270_lane + +#include "test-neon.h" +#include "../../../simde/arm/neon/cmla_rot270_lane.h" +#include "../../../simde/arm/neon/dup_n.h" + +static int +test_simde_vcmla_rot270_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t r_[4]; + simde_float16_t a[4]; + simde_float16_t b[4]; + const int lane; + simde_float16_t r[4]; + } test_vec[] = { + { + { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) }, + { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -31.19) }, + { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -48.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 389.25), SIMDE_FLOAT16_VALUE( -547.50), SIMDE_FLOAT16_VALUE( -1965.00), SIMDE_FLOAT16_VALUE( 771.00) } }, + { { SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, + { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -14.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, + { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 61.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -133.12), SIMDE_FLOAT16_VALUE( 966.00), SIMDE_FLOAT16_VALUE( 43968.00), SIMDE_FLOAT16_VALUE(-43456.00) } }, + { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, + { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, + { SIMDE_FLOAT16_VALUE( -61.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 58720.00), SIMDE_FLOAT16_VALUE(-59360.00), SIMDE_FLOAT16_VALUE(-27488.00), SIMDE_FLOAT16_VALUE( 27264.00) } }, + { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -180.50) }, + { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 129.00), SIMDE_FLOAT16_VALUE( 99.06), SIMDE_FLOAT16_VALUE( -75.25) }, + { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 239.50), SIMDE_FLOAT16_VALUE( -29.96), SIMDE_FLOAT16_VALUE( -177.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 30992.00), SIMDE_FLOAT16_VALUE(-31104.00), SIMDE_FLOAT16_VALUE(-18160.00), SIMDE_FLOAT16_VALUE( 17840.00) } }, + { { SIMDE_FLOAT16_VALUE( 167.25), SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( -63.38), SIMDE_FLOAT16_VALUE( 57.00) }, + { SIMDE_FLOAT16_VALUE( 191.75), SIMDE_FLOAT16_VALUE( -197.00), SIMDE_FLOAT16_VALUE( 285.00), SIMDE_FLOAT16_VALUE( -529.00) }, + { SIMDE_FLOAT16_VALUE( -80.50), SIMDE_FLOAT16_VALUE( 375.50), SIMDE_FLOAT16_VALUE( -206.00), SIMDE_FLOAT16_VALUE( -75.25) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 16024.00), SIMDE_FLOAT16_VALUE(-15864.00), SIMDE_FLOAT16_VALUE( 42528.00), SIMDE_FLOAT16_VALUE(-42528.00) } }, + { { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( -41.00), SIMDE_FLOAT16_VALUE( 285.00) }, + { SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -271.25), SIMDE_FLOAT16_VALUE( -257.50) }, + { SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( -180.50), SIMDE_FLOAT16_VALUE( 69.62), SIMDE_FLOAT16_VALUE( 131.38) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -1876.00), SIMDE_FLOAT16_VALUE( 1769.00), SIMDE_FLOAT16_VALUE( 46432.00), SIMDE_FLOAT16_VALUE(-46208.00) } }, + { { SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -247.00), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 17.94) }, + { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( -110.75), SIMDE_FLOAT16_VALUE( 18.20) }, + { SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 97.31) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 15128.00), SIMDE_FLOAT16_VALUE(-15168.00), SIMDE_FLOAT16_VALUE( 1695.00), SIMDE_FLOAT16_VALUE( -1617.00) } }, + { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00) }, + { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( -151.12) }, + { SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44), SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( 75.88) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-14904.00), SIMDE_FLOAT16_VALUE( 13320.00), SIMDE_FLOAT16_VALUE( 12312.00), SIMDE_FLOAT16_VALUE(-11272.00) } } + + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x4_t r; + SIMDE_CONSTIFY_2_(simde_vcmla_rot270_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t r = simde_vcmla_rot270_lane_f16(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmla_rot270_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32_t r_[2]; + simde_float32_t a[2]; + simde_float32_t b[2]; + const int lane; + simde_float32_t r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 83.21), SIMDE_FLOAT32_C( 417.90) }, + { SIMDE_FLOAT32_C( -875.72), SIMDE_FLOAT32_C( 830.54) }, + { SIMDE_FLOAT32_C( -633.53), SIMDE_FLOAT32_C( 832.17) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-526088.812500), SIMDE_FLOAT32_C(526589.937500) } }, + { { SIMDE_FLOAT32_C( -890.17), SIMDE_FLOAT32_C( 649.92) }, + { SIMDE_FLOAT32_C( -111.22), SIMDE_FLOAT32_C( -830.36) }, + { SIMDE_FLOAT32_C( 59.76), SIMDE_FLOAT32_C( 970.61) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-50512.480469), SIMDE_FLOAT32_C(50272.230469) } }, + { { SIMDE_FLOAT32_C( 522.31), SIMDE_FLOAT32_C( -822.40) }, + { SIMDE_FLOAT32_C( 411.34), SIMDE_FLOAT32_C( -692.35) }, + { SIMDE_FLOAT32_C( 648.71), SIMDE_FLOAT32_C( 385.20) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-448612.062500), SIMDE_FLOAT32_C(448311.968750) } }, + { { SIMDE_FLOAT32_C( 479.18), SIMDE_FLOAT32_C( -793.73) }, + { SIMDE_FLOAT32_C( -740.26), SIMDE_FLOAT32_C( 245.04) }, + { SIMDE_FLOAT32_C( 229.26), SIMDE_FLOAT32_C( -113.23) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(56657.046875), SIMDE_FLOAT32_C(-56971.597656) } }, + { { SIMDE_FLOAT32_C( 331.48), SIMDE_FLOAT32_C( -677.34) }, + { SIMDE_FLOAT32_C( 97.30), SIMDE_FLOAT32_C( -52.10) }, + { SIMDE_FLOAT32_C( 239.17), SIMDE_FLOAT32_C( 469.68) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-12129.276367), SIMDE_FLOAT32_C(11783.416992) } }, + { { SIMDE_FLOAT32_C( -543.40), SIMDE_FLOAT32_C( 826.85) }, + { SIMDE_FLOAT32_C( 226.38), SIMDE_FLOAT32_C( 178.84) }, + { SIMDE_FLOAT32_C( 181.71), SIMDE_FLOAT32_C( 420.52) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(31953.617188), SIMDE_FLOAT32_C(-31670.166016) } }, + { { SIMDE_FLOAT32_C( -698.84), SIMDE_FLOAT32_C( -151.15) }, + { SIMDE_FLOAT32_C( -388.27), SIMDE_FLOAT32_C( 350.81) }, + { SIMDE_FLOAT32_C( 890.40), SIMDE_FLOAT32_C( -664.75) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(311662.375000), SIMDE_FLOAT32_C(-312512.375000) } }, + { { SIMDE_FLOAT32_C( -617.94), SIMDE_FLOAT32_C( 190.84) }, + { SIMDE_FLOAT32_C( 218.13), SIMDE_FLOAT32_C( -328.97) }, + { SIMDE_FLOAT32_C( -549.59), SIMDE_FLOAT32_C( -459.89) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(180180.687500), SIMDE_FLOAT32_C(-180607.796875) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); + simde_float32x2_t r = simde_vcmla_rot270_lane_f32(r_, a, b, 0); + + simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); + // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t r = simde_vcmla_rot270_lane_f32(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + + +static int +test_simde_vcmla_rot270_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t r_[4]; + simde_float16_t a[4]; + simde_float16_t b[8]; + const int lane; + simde_float16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 816.50), SIMDE_FLOAT16_VALUE( 773.50), SIMDE_FLOAT16_VALUE( -377.75), SIMDE_FLOAT16_VALUE( 544.50) }, + { SIMDE_FLOAT16_VALUE( -188.38), SIMDE_FLOAT16_VALUE( 269.75), SIMDE_FLOAT16_VALUE( 115.62), SIMDE_FLOAT16_VALUE( -544.00) }, + { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( 484.00), SIMDE_FLOAT16_VALUE( -757.00), + SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 12392.00), SIMDE_FLOAT16_VALUE(-10800.00), SIMDE_FLOAT16_VALUE(-23712.00), SIMDE_FLOAT16_VALUE( 23888.00) } }, + { { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, + { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, + { SIMDE_FLOAT16_VALUE( -899.00), SIMDE_FLOAT16_VALUE( 32.51), SIMDE_FLOAT16_VALUE( 359.00), SIMDE_FLOAT16_VALUE( 342.25), + SIMDE_FLOAT16_VALUE( 913.00), SIMDE_FLOAT16_VALUE( 740.50), SIMDE_FLOAT16_VALUE( -436.25), SIMDE_FLOAT16_VALUE( -904.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 5536.00), SIMDE_FLOAT16_VALUE( -5360.00), SIMDE_FLOAT16_VALUE( 2150.00), SIMDE_FLOAT16_VALUE( -1565.00) } }, + { { SIMDE_FLOAT16_VALUE( -335.25), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( 227.25), SIMDE_FLOAT16_VALUE( -8.67) }, + { SIMDE_FLOAT16_VALUE( 490.50), SIMDE_FLOAT16_VALUE( 507.50), SIMDE_FLOAT16_VALUE( -979.50), SIMDE_FLOAT16_VALUE( -718.00) }, + { SIMDE_FLOAT16_VALUE( -10.20), SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -797.50), SIMDE_FLOAT16_VALUE( 741.50), + SIMDE_FLOAT16_VALUE( -835.00), SIMDE_FLOAT16_VALUE( -308.00), SIMDE_FLOAT16_VALUE( -674.50), SIMDE_FLOAT16_VALUE( -247.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( -5512.00), SIMDE_FLOAT16_VALUE( 4924.00), SIMDE_FLOAT16_VALUE( 7552.00), SIMDE_FLOAT16_VALUE( -7336.00) } }, + { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 25.50), SIMDE_FLOAT16_VALUE( -44.50) }, + { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -66.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, + { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 85.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50), + SIMDE_FLOAT16_VALUE( -975.50), SIMDE_FLOAT16_VALUE( 383.50), SIMDE_FLOAT16_VALUE( 500.25), SIMDE_FLOAT16_VALUE( 517.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -5864.00), SIMDE_FLOAT16_VALUE( 5220.00), SIMDE_FLOAT16_VALUE( 39616.00), SIMDE_FLOAT16_VALUE(-39648.00) } }, + { { SIMDE_FLOAT16_VALUE( 540.00), SIMDE_FLOAT16_VALUE( -927.50), SIMDE_FLOAT16_VALUE( 17.94), SIMDE_FLOAT16_VALUE( 340.75) }, + { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( 854.00), SIMDE_FLOAT16_VALUE( -110.75) }, + { SIMDE_FLOAT16_VALUE( -53.36), SIMDE_FLOAT16_VALUE( -465.00), SIMDE_FLOAT16_VALUE( -104.00), SIMDE_FLOAT16_VALUE( -348.00), + SIMDE_FLOAT16_VALUE( -370.75), SIMDE_FLOAT16_VALUE( 18.20), SIMDE_FLOAT16_VALUE( -209.25), SIMDE_FLOAT16_VALUE( -856.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( -8328.00), SIMDE_FLOAT16_VALUE( 7940.00), SIMDE_FLOAT16_VALUE( 5928.00), SIMDE_FLOAT16_VALUE( -5572.00) } }, + { { SIMDE_FLOAT16_VALUE( 251.25), SIMDE_FLOAT16_VALUE( 523.50), SIMDE_FLOAT16_VALUE( -62.34), SIMDE_FLOAT16_VALUE( -412.00) }, + { SIMDE_FLOAT16_VALUE( 818.50), SIMDE_FLOAT16_VALUE( -266.25), SIMDE_FLOAT16_VALUE( 558.50), SIMDE_FLOAT16_VALUE( -474.75) }, + { SIMDE_FLOAT16_VALUE( 583.50), SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 875.50), SIMDE_FLOAT16_VALUE( 382.50), + SIMDE_FLOAT16_VALUE( -535.00), SIMDE_FLOAT16_VALUE( 374.50), SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 432.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-15368.00), SIMDE_FLOAT16_VALUE( 16144.00), SIMDE_FLOAT16_VALUE(-27904.00), SIMDE_FLOAT16_VALUE( 27440.00) } }, + { { SIMDE_FLOAT16_VALUE( 83.19), SIMDE_FLOAT16_VALUE( 418.00), SIMDE_FLOAT16_VALUE( -875.50), SIMDE_FLOAT16_VALUE( 830.50) }, + { SIMDE_FLOAT16_VALUE( -633.50), SIMDE_FLOAT16_VALUE( 832.00), SIMDE_FLOAT16_VALUE( -890.00), SIMDE_FLOAT16_VALUE( 650.00) }, + { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -830.50), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( 970.50), + SIMDE_FLOAT16_VALUE( 522.50), SIMDE_FLOAT16_VALUE( -822.50), SIMDE_FLOAT16_VALUE( 411.25), SIMDE_FLOAT16_VALUE( -692.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-25872.00), SIMDE_FLOAT16_VALUE( 26368.00), SIMDE_FLOAT16_VALUE(-21152.00), SIMDE_FLOAT16_VALUE( 21104.00) } }, + { { SIMDE_FLOAT16_VALUE( 648.50), SIMDE_FLOAT16_VALUE( 385.25), SIMDE_FLOAT16_VALUE( 479.25), SIMDE_FLOAT16_VALUE( -793.50) }, + { SIMDE_FLOAT16_VALUE( -740.50), SIMDE_FLOAT16_VALUE( 245.00), SIMDE_FLOAT16_VALUE( 229.25), SIMDE_FLOAT16_VALUE( -113.25) }, + { SIMDE_FLOAT16_VALUE( 331.50), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( 97.31), SIMDE_FLOAT16_VALUE( -52.09), + SIMDE_FLOAT16_VALUE( 239.12), SIMDE_FLOAT16_VALUE( 469.75), SIMDE_FLOAT16_VALUE( -543.50), SIMDE_FLOAT16_VALUE( 827.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-17792.00), SIMDE_FLOAT16_VALUE( 18816.00), SIMDE_FLOAT16_VALUE( 9000.00), SIMDE_FLOAT16_VALUE( -9312.00) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + + simde_float16x4_t r; + SIMDE_CONSTIFY_2_(simde_vcmla_rot270_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x4_t r = simde_vcmla_rot270_laneq_f16(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmla_rot270_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32_t r_[2]; + simde_float32_t a[2]; + simde_float32_t b[4]; + const int lane; + simde_float32_t r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( -470.26), SIMDE_FLOAT32_C( 81.87) }, + { SIMDE_FLOAT32_C( 72.27), SIMDE_FLOAT32_C( -992.59) }, + { SIMDE_FLOAT32_C( -615.67), SIMDE_FLOAT32_C( 620.28), SIMDE_FLOAT32_C( -606.51), SIMDE_FLOAT32_C( -327.77) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(610637.625000), SIMDE_FLOAT32_C(-611026.000000) } }, + { { SIMDE_FLOAT32_C( -331.80), SIMDE_FLOAT32_C( -605.94) }, + { SIMDE_FLOAT32_C( -295.86), SIMDE_FLOAT32_C( -275.42) }, + { SIMDE_FLOAT32_C( -752.64), SIMDE_FLOAT32_C( 21.91), SIMDE_FLOAT32_C( 826.83), SIMDE_FLOAT32_C( 599.82) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-6366.252441), SIMDE_FLOAT32_C(5428.512207) } }, + { { SIMDE_FLOAT32_C( -30.36), SIMDE_FLOAT32_C( 631.53) }, + { SIMDE_FLOAT32_C( 850.75), SIMDE_FLOAT32_C( -263.55) }, + { SIMDE_FLOAT32_C( 139.96), SIMDE_FLOAT32_C( 859.14), SIMDE_FLOAT32_C( -834.47), SIMDE_FLOAT32_C( 216.10) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-36916.816406), SIMDE_FLOAT32_C(37517.988281) } }, + { { SIMDE_FLOAT32_C( 995.86), SIMDE_FLOAT32_C( 529.74) }, + { SIMDE_FLOAT32_C( 79.08), SIMDE_FLOAT32_C( 947.13) }, + { SIMDE_FLOAT32_C( 122.02), SIMDE_FLOAT32_C( -250.00), SIMDE_FLOAT32_C( -361.82), SIMDE_FLOAT32_C( 265.24) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-235786.640625), SIMDE_FLOAT32_C(237312.234375) } }, + { { SIMDE_FLOAT32_C( 275.71), SIMDE_FLOAT32_C( 2.71) }, + { SIMDE_FLOAT32_C( 99.79), SIMDE_FLOAT32_C( -137.67) }, + { SIMDE_FLOAT32_C( -761.19), SIMDE_FLOAT32_C( 813.19), SIMDE_FLOAT32_C( -897.68), SIMDE_FLOAT32_C( 653.58) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(105068.734375), SIMDE_FLOAT32_C(-104790.312500) } }, + { { SIMDE_FLOAT32_C( 396.02), SIMDE_FLOAT32_C( 413.06) }, + { SIMDE_FLOAT32_C( 514.09), SIMDE_FLOAT32_C( -977.67) }, + { SIMDE_FLOAT32_C( -671.79), SIMDE_FLOAT32_C( -92.13), SIMDE_FLOAT32_C( -441.32), SIMDE_FLOAT32_C( -374.27) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(90468.750000), SIMDE_FLOAT32_C(-89659.671875) } }, + { { SIMDE_FLOAT32_C( -151.97), SIMDE_FLOAT32_C( -79.55) }, + { SIMDE_FLOAT32_C( -214.62), SIMDE_FLOAT32_C( -614.75) }, + { SIMDE_FLOAT32_C( 678.79), SIMDE_FLOAT32_C( 783.83), SIMDE_FLOAT32_C( 493.05), SIMDE_FLOAT32_C( -896.00) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-417438.093750), SIMDE_FLOAT32_C(417206.593750) } }, + { { SIMDE_FLOAT32_C( 104.13), SIMDE_FLOAT32_C( 278.54) }, + { SIMDE_FLOAT32_C( 171.54), SIMDE_FLOAT32_C( -682.63) }, + { SIMDE_FLOAT32_C( 217.09), SIMDE_FLOAT32_C( 49.35), SIMDE_FLOAT32_C( 256.50), SIMDE_FLOAT32_C( -92.04) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-33583.660156), SIMDE_FLOAT32_C(33966.328125) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); + simde_float32x2_t r; + SIMDE_CONSTIFY_2_(simde_vcmla_rot270_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); + // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x2_t r = simde_vcmla_rot270_laneq_f32(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmlaq_rot270_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t r_[8]; + simde_float16_t a[8]; + simde_float16_t b[4]; + const int lane; + simde_float16_t r[8]; + } test_vec[] = { + + { { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( 420.50), + SIMDE_FLOAT16_VALUE( -699.00), SIMDE_FLOAT16_VALUE( -151.12), SIMDE_FLOAT16_VALUE( -388.25), SIMDE_FLOAT16_VALUE( 350.75) }, + { SIMDE_FLOAT16_VALUE( 890.50), SIMDE_FLOAT16_VALUE( -665.00), SIMDE_FLOAT16_VALUE( -618.00), SIMDE_FLOAT16_VALUE( 190.88), + SIMDE_FLOAT16_VALUE( 218.12), SIMDE_FLOAT16_VALUE( -329.00), SIMDE_FLOAT16_VALUE( -549.50), SIMDE_FLOAT16_VALUE( -460.00) }, + { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -695.50), SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 32992.00), SIMDE_FLOAT16_VALUE(-32592.00), SIMDE_FLOAT16_VALUE( -9224.00), SIMDE_FLOAT16_VALUE( 9824.00), + SIMDE_FLOAT16_VALUE( 15512.00), SIMDE_FLOAT16_VALUE(-16368.00), SIMDE_FLOAT16_VALUE( 22288.00), SIMDE_FLOAT16_VALUE(-22320.00) } }, + { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00), + SIMDE_FLOAT16_VALUE( -786.50), SIMDE_FLOAT16_VALUE( -974.00), SIMDE_FLOAT16_VALUE( -570.50), SIMDE_FLOAT16_VALUE( 369.25) }, + { SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( -40.00), SIMDE_FLOAT16_VALUE( -52.00), SIMDE_FLOAT16_VALUE( 75.88), + SIMDE_FLOAT16_VALUE( 352.25), SIMDE_FLOAT16_VALUE( 324.25), SIMDE_FLOAT16_VALUE( 485.50), SIMDE_FLOAT16_VALUE( 43.50) }, + { SIMDE_FLOAT16_VALUE( -976.00), SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -100.62), SIMDE_FLOAT16_VALUE( -463.75) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -4448.00), SIMDE_FLOAT16_VALUE( 2868.00), SIMDE_FLOAT16_VALUE( 7244.00), SIMDE_FLOAT16_VALUE( -6208.00), + SIMDE_FLOAT16_VALUE( 28208.00), SIMDE_FLOAT16_VALUE(-29968.00), SIMDE_FLOAT16_VALUE( 3320.00), SIMDE_FLOAT16_VALUE( -3522.00) } }, + { { SIMDE_FLOAT16_VALUE( -973.00), SIMDE_FLOAT16_VALUE( -159.62), SIMDE_FLOAT16_VALUE( -377.00), SIMDE_FLOAT16_VALUE( -853.00), + SIMDE_FLOAT16_VALUE( 831.00), SIMDE_FLOAT16_VALUE( 661.00), SIMDE_FLOAT16_VALUE( -717.00), SIMDE_FLOAT16_VALUE( 891.50) }, + { SIMDE_FLOAT16_VALUE( 535.50), SIMDE_FLOAT16_VALUE( 621.50), SIMDE_FLOAT16_VALUE( 578.50), SIMDE_FLOAT16_VALUE( 430.50), + SIMDE_FLOAT16_VALUE( -574.50), SIMDE_FLOAT16_VALUE( -319.75), SIMDE_FLOAT16_VALUE( -735.00), SIMDE_FLOAT16_VALUE( 248.50) }, + { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 192.38), SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 502.25) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 19280.00), SIMDE_FLOAT16_VALUE(-20416.00), SIMDE_FLOAT16_VALUE( 13656.00), SIMDE_FLOAT16_VALUE(-14888.00), + SIMDE_FLOAT16_VALUE( -9592.00), SIMDE_FLOAT16_VALUE( 11080.00), SIMDE_FLOAT16_VALUE( 7384.00), SIMDE_FLOAT16_VALUE( -7208.00) } }, + { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( -70.81), SIMDE_FLOAT16_VALUE( -596.50), SIMDE_FLOAT16_VALUE( 810.50), + SIMDE_FLOAT16_VALUE( 863.50), SIMDE_FLOAT16_VALUE( 828.50), SIMDE_FLOAT16_VALUE( -563.50), SIMDE_FLOAT16_VALUE( -576.50) }, + { SIMDE_FLOAT16_VALUE( -703.50), SIMDE_FLOAT16_VALUE( 384.00), SIMDE_FLOAT16_VALUE( -772.50), SIMDE_FLOAT16_VALUE( 457.50), + SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( 653.00), SIMDE_FLOAT16_VALUE( -121.00), SIMDE_FLOAT16_VALUE( 945.50) }, + { SIMDE_FLOAT16_VALUE( -280.75), SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( 688.50), SIMDE_FLOAT16_VALUE( 192.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-12696.00), SIMDE_FLOAT16_VALUE( 12008.00), SIMDE_FLOAT16_VALUE(-14984.00), SIMDE_FLOAT16_VALUE( 15200.00), + SIMDE_FLOAT16_VALUE(-19680.00), SIMDE_FLOAT16_VALUE( 21360.00), SIMDE_FLOAT16_VALUE(-30304.00), SIMDE_FLOAT16_VALUE( 29168.00) } }, + { { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -323.75), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -283.75), + SIMDE_FLOAT16_VALUE( -117.75), SIMDE_FLOAT16_VALUE( -841.50), SIMDE_FLOAT16_VALUE( 665.00), SIMDE_FLOAT16_VALUE( -987.00) }, + { SIMDE_FLOAT16_VALUE( -643.00), SIMDE_FLOAT16_VALUE( -152.12), SIMDE_FLOAT16_VALUE( 964.00), SIMDE_FLOAT16_VALUE( 920.00), + SIMDE_FLOAT16_VALUE( 630.50), SIMDE_FLOAT16_VALUE( -669.50), SIMDE_FLOAT16_VALUE( 671.00), SIMDE_FLOAT16_VALUE( 257.00) }, + { SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -857.50), SIMDE_FLOAT16_VALUE( 334.75), SIMDE_FLOAT16_VALUE( -617.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( -2078.00), SIMDE_FLOAT16_VALUE( 1231.00), SIMDE_FLOAT16_VALUE( 8512.00), SIMDE_FLOAT16_VALUE( -9688.00), + SIMDE_FLOAT16_VALUE( -6960.00), SIMDE_FLOAT16_VALUE( 6000.00), SIMDE_FLOAT16_VALUE( 3292.00), SIMDE_FLOAT16_VALUE( -3614.00) } }, + { { SIMDE_FLOAT16_VALUE( -439.50), SIMDE_FLOAT16_VALUE( 245.12), SIMDE_FLOAT16_VALUE( 111.06), SIMDE_FLOAT16_VALUE( 520.50), + SIMDE_FLOAT16_VALUE( 85.50), SIMDE_FLOAT16_VALUE( 250.25), SIMDE_FLOAT16_VALUE( -680.00), SIMDE_FLOAT16_VALUE( -750.00) }, + { SIMDE_FLOAT16_VALUE( -138.25), SIMDE_FLOAT16_VALUE( -14.62), SIMDE_FLOAT16_VALUE( -921.50), SIMDE_FLOAT16_VALUE( 225.88), + SIMDE_FLOAT16_VALUE( 242.88), SIMDE_FLOAT16_VALUE( 869.50), SIMDE_FLOAT16_VALUE( 298.00), SIMDE_FLOAT16_VALUE( 105.69) }, + { SIMDE_FLOAT16_VALUE( -722.50), SIMDE_FLOAT16_VALUE( -8.75), SIMDE_FLOAT16_VALUE( -245.75), SIMDE_FLOAT16_VALUE( 915.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -311.50), SIMDE_FLOAT16_VALUE( 117.25), SIMDE_FLOAT16_VALUE( -1865.00), SIMDE_FLOAT16_VALUE( 2496.00), + SIMDE_FLOAT16_VALUE( -7524.00), SIMDE_FLOAT16_VALUE( 7860.00), SIMDE_FLOAT16_VALUE( -1605.00), SIMDE_FLOAT16_VALUE( 174.75) } }, + { { SIMDE_FLOAT16_VALUE( 54.19), SIMDE_FLOAT16_VALUE( -928.00), SIMDE_FLOAT16_VALUE( 362.50), SIMDE_FLOAT16_VALUE( -936.50), + SIMDE_FLOAT16_VALUE( 185.88), SIMDE_FLOAT16_VALUE( -244.38), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( -644.00) }, + { SIMDE_FLOAT16_VALUE( -517.00), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -751.50), SIMDE_FLOAT16_VALUE( -974.00), + SIMDE_FLOAT16_VALUE( -144.38), SIMDE_FLOAT16_VALUE( 338.25), SIMDE_FLOAT16_VALUE( 705.00), SIMDE_FLOAT16_VALUE( 116.88) }, + { SIMDE_FLOAT16_VALUE( 49.38), SIMDE_FLOAT16_VALUE( -363.00), SIMDE_FLOAT16_VALUE( -476.25), SIMDE_FLOAT16_VALUE( 106.69) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-30304.00), SIMDE_FLOAT16_VALUE( 29440.00), SIMDE_FLOAT16_VALUE(-47744.00), SIMDE_FLOAT16_VALUE( 47168.00), + SIMDE_FLOAT16_VALUE( 16880.00), SIMDE_FLOAT16_VALUE(-16944.00), SIMDE_FLOAT16_VALUE( 6696.00), SIMDE_FLOAT16_VALUE( -6416.00) } }, + { { SIMDE_FLOAT16_VALUE( -726.00), SIMDE_FLOAT16_VALUE( -353.75), SIMDE_FLOAT16_VALUE( 268.50), SIMDE_FLOAT16_VALUE( 729.00), + SIMDE_FLOAT16_VALUE( -470.25), SIMDE_FLOAT16_VALUE( 81.88), SIMDE_FLOAT16_VALUE( 72.25), SIMDE_FLOAT16_VALUE( -992.50) }, + { SIMDE_FLOAT16_VALUE( -615.50), SIMDE_FLOAT16_VALUE( 620.50), SIMDE_FLOAT16_VALUE( -606.50), SIMDE_FLOAT16_VALUE( -327.75), + SIMDE_FLOAT16_VALUE( -331.75), SIMDE_FLOAT16_VALUE( -606.00), SIMDE_FLOAT16_VALUE( -295.75), SIMDE_FLOAT16_VALUE( -275.50) }, + { SIMDE_FLOAT16_VALUE( -752.50), SIMDE_FLOAT16_VALUE( 21.91), SIMDE_FLOAT16_VALUE( 827.00), SIMDE_FLOAT16_VALUE( 600.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 12864.00), SIMDE_FLOAT16_VALUE(-13944.00), SIMDE_FLOAT16_VALUE( -6912.00), SIMDE_FLOAT16_VALUE( 7908.00), + SIMDE_FLOAT16_VALUE(-13744.00), SIMDE_FLOAT16_VALUE( 13360.00), SIMDE_FLOAT16_VALUE( -5964.00), SIMDE_FLOAT16_VALUE( 5044.00) } } + + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x8_t r; + SIMDE_CONSTIFY_2_(simde_vcmlaq_rot270_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); + simde_float16x8_t r = simde_vcmlaq_lane_f16(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmlaq_rot270_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32_t r_[4]; + simde_float32_t a[4]; + simde_float32_t b[2]; + const int lane; + simde_float32_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 301.65), SIMDE_FLOAT32_C( 490.71), SIMDE_FLOAT32_C( -744.66), SIMDE_FLOAT32_C( -738.17) }, + { SIMDE_FLOAT32_C( -301.20), SIMDE_FLOAT32_C( -904.34), SIMDE_FLOAT32_C( 771.98), SIMDE_FLOAT32_C( 233.71) }, + { SIMDE_FLOAT32_C( 830.18), SIMDE_FLOAT32_C( 979.39) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-750463.375000), SIMDE_FLOAT32_C(751255.687500), SIMDE_FLOAT32_C(193276.718750), SIMDE_FLOAT32_C(-194759.546875) } }, + { { SIMDE_FLOAT32_C( -38.01), SIMDE_FLOAT32_C( 151.61), SIMDE_FLOAT32_C( 201.45), SIMDE_FLOAT32_C( -747.32) }, + { SIMDE_FLOAT32_C( -331.17), SIMDE_FLOAT32_C( 7.62), SIMDE_FLOAT32_C( -454.77), SIMDE_FLOAT32_C( -381.81) }, + { SIMDE_FLOAT32_C( 236.39), SIMDE_FLOAT32_C( -158.94) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(1763.281738), SIMDE_FLOAT32_C(-1649.681763), SIMDE_FLOAT32_C(-90054.617188), SIMDE_FLOAT32_C(89508.742188) } }, + { { SIMDE_FLOAT32_C( 605.85), SIMDE_FLOAT32_C( 244.27), SIMDE_FLOAT32_C( -426.53), SIMDE_FLOAT32_C( -969.18) }, + { SIMDE_FLOAT32_C( 322.13), SIMDE_FLOAT32_C( 863.77), SIMDE_FLOAT32_C( -685.35), SIMDE_FLOAT32_C( -710.70) }, + { SIMDE_FLOAT32_C( -791.12), SIMDE_FLOAT32_C( 373.53) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-682739.875000), SIMDE_FLOAT32_C(683590.000000), SIMDE_FLOAT32_C(561822.437500), SIMDE_FLOAT32_C(-563218.187500) } }, + { { SIMDE_FLOAT32_C( -606.46), SIMDE_FLOAT32_C( -507.55), SIMDE_FLOAT32_C( -68.24), SIMDE_FLOAT32_C( -823.05) }, + { SIMDE_FLOAT32_C( -359.95), SIMDE_FLOAT32_C( 611.92), SIMDE_FLOAT32_C( 514.14), SIMDE_FLOAT32_C( -660.86) }, + { SIMDE_FLOAT32_C( 181.81), SIMDE_FLOAT32_C( 115.86) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(110646.710938), SIMDE_FLOAT32_C(-111760.718750), SIMDE_FLOAT32_C(-120219.195312), SIMDE_FLOAT32_C(119327.898438) } }, + { { SIMDE_FLOAT32_C( 951.84), SIMDE_FLOAT32_C( -864.96), SIMDE_FLOAT32_C( 970.59), SIMDE_FLOAT32_C( 769.97) }, + { SIMDE_FLOAT32_C( 609.64), SIMDE_FLOAT32_C( -580.87), SIMDE_FLOAT32_C( 358.59), SIMDE_FLOAT32_C( 350.56) }, + { SIMDE_FLOAT32_C( 26.31), SIMDE_FLOAT32_C( 163.74) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-14330.849609), SIMDE_FLOAT32_C(14417.729492), SIMDE_FLOAT32_C(10193.823242), SIMDE_FLOAT32_C(-8453.263672) } }, + { { SIMDE_FLOAT32_C( -636.68), SIMDE_FLOAT32_C( 554.68), SIMDE_FLOAT32_C( -385.40), SIMDE_FLOAT32_C( -565.95) }, + { SIMDE_FLOAT32_C( 129.69), SIMDE_FLOAT32_C( 961.79), SIMDE_FLOAT32_C( -333.22), SIMDE_FLOAT32_C( 69.65) }, + { SIMDE_FLOAT32_C( 726.96), SIMDE_FLOAT32_C( 131.41) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(698546.187500), SIMDE_FLOAT32_C(-698628.187500), SIMDE_FLOAT32_C(50247.367188), SIMDE_FLOAT32_C(-51198.714844) } }, + { { SIMDE_FLOAT32_C( -211.91), SIMDE_FLOAT32_C( 829.24), SIMDE_FLOAT32_C( -475.13), SIMDE_FLOAT32_C( 562.33) }, + { SIMDE_FLOAT32_C( 515.59), SIMDE_FLOAT32_C( -290.69), SIMDE_FLOAT32_C( -816.53), SIMDE_FLOAT32_C( 17.39) }, + { SIMDE_FLOAT32_C( 268.14), SIMDE_FLOAT32_C( 729.88) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-78157.531250), SIMDE_FLOAT32_C(78774.859375), SIMDE_FLOAT32_C(4187.824707), SIMDE_FLOAT32_C(-4100.624512) } }, + { { SIMDE_FLOAT32_C( -894.99), SIMDE_FLOAT32_C( 516.42), SIMDE_FLOAT32_C( -169.55), SIMDE_FLOAT32_C( 696.41) }, + { SIMDE_FLOAT32_C( -388.51), SIMDE_FLOAT32_C( 987.71), SIMDE_FLOAT32_C( -91.49), SIMDE_FLOAT32_C( -970.85) }, + { SIMDE_FLOAT32_C( 357.28), SIMDE_FLOAT32_C( -28.01) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(351994.031250), SIMDE_FLOAT32_C(-352372.625000), SIMDE_FLOAT32_C(-347034.812500), SIMDE_FLOAT32_C(347561.687500) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); + simde_float32x4_t r = simde_vcmlaq_rot270_lane_f32(r_, a, b, 0); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x4_t r = simde_vcmlaq_lane_f32(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmlaq_rot270_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t r_[8]; + simde_float16_t a[8]; + simde_float16_t b[8]; + const int lane; + simde_float16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( -30.36), SIMDE_FLOAT16_VALUE( 631.50), SIMDE_FLOAT16_VALUE( 851.00), SIMDE_FLOAT16_VALUE( -263.50), + SIMDE_FLOAT16_VALUE( 140.00), SIMDE_FLOAT16_VALUE( 859.00), SIMDE_FLOAT16_VALUE( -834.50), SIMDE_FLOAT16_VALUE( 216.12) }, + { SIMDE_FLOAT16_VALUE( 996.00), SIMDE_FLOAT16_VALUE( 529.50), SIMDE_FLOAT16_VALUE( 79.06), SIMDE_FLOAT16_VALUE( 947.00), + SIMDE_FLOAT16_VALUE( 122.00), SIMDE_FLOAT16_VALUE( -250.00), SIMDE_FLOAT16_VALUE( -361.75), SIMDE_FLOAT16_VALUE( 265.25) }, + { SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 2.71), SIMDE_FLOAT16_VALUE( 99.81), SIMDE_FLOAT16_VALUE( -137.62), + SIMDE_FLOAT16_VALUE( -761.00), SIMDE_FLOAT16_VALUE( 813.00), SIMDE_FLOAT16_VALUE( -897.50), SIMDE_FLOAT16_VALUE( 653.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 31024.00), SIMDE_FLOAT16_VALUE(-30432.00), SIMDE_FLOAT16_VALUE( 56384.00), SIMDE_FLOAT16_VALUE(-55808.00), + SIMDE_FLOAT16_VALUE(-14528.00), SIMDE_FLOAT16_VALUE( 15520.00), SIMDE_FLOAT16_VALUE( 14728.00), SIMDE_FLOAT16_VALUE(-15344.00) } }, + { { SIMDE_FLOAT16_VALUE( 396.00), SIMDE_FLOAT16_VALUE( 413.00), SIMDE_FLOAT16_VALUE( 514.00), SIMDE_FLOAT16_VALUE( -977.50), + SIMDE_FLOAT16_VALUE( -672.00), SIMDE_FLOAT16_VALUE( -92.12), SIMDE_FLOAT16_VALUE( -441.25), SIMDE_FLOAT16_VALUE( -374.25) }, + { SIMDE_FLOAT16_VALUE( -152.00), SIMDE_FLOAT16_VALUE( -79.56), SIMDE_FLOAT16_VALUE( -214.62), SIMDE_FLOAT16_VALUE( -615.00), + SIMDE_FLOAT16_VALUE( 79.00), SIMDE_FLOAT16_VALUE( 84.00), SIMDE_FLOAT16_VALUE( 493.00), SIMDE_FLOAT16_VALUE( -96.00) }, + { SIMDE_FLOAT16_VALUE( 104.12), SIMDE_FLOAT16_VALUE( 78.50), SIMDE_FLOAT16_VALUE( 171.50), SIMDE_FLOAT16_VALUE( -682.50), + SIMDE_FLOAT16_VALUE( 217.12), SIMDE_FLOAT16_VALUE( 49.34), SIMDE_FLOAT16_VALUE( 256.50), SIMDE_FLOAT16_VALUE( -92.06) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -5848.00), SIMDE_FLOAT16_VALUE( 6660.00), SIMDE_FLOAT16_VALUE(-47776.00), SIMDE_FLOAT16_VALUE( 47296.00), + SIMDE_FLOAT16_VALUE( 5920.00), SIMDE_FLOAT16_VALUE( -6688.00), SIMDE_FLOAT16_VALUE( -7976.00), SIMDE_FLOAT16_VALUE( 7160.00) } }, + { { SIMDE_FLOAT16_VALUE( -728.00), SIMDE_FLOAT16_VALUE( -108.38), SIMDE_FLOAT16_VALUE( -77.88), SIMDE_FLOAT16_VALUE( -353.00), + SIMDE_FLOAT16_VALUE( -239.00), SIMDE_FLOAT16_VALUE( 704.50), SIMDE_FLOAT16_VALUE( 914.00), SIMDE_FLOAT16_VALUE( -211.12) }, + { SIMDE_FLOAT16_VALUE( -473.25), SIMDE_FLOAT16_VALUE( 74.38), SIMDE_FLOAT16_VALUE( 904.50), SIMDE_FLOAT16_VALUE( -290.50), + SIMDE_FLOAT16_VALUE( -796.00), SIMDE_FLOAT16_VALUE( 421.25), SIMDE_FLOAT16_VALUE( 215.75), SIMDE_FLOAT16_VALUE( 249.38) }, + { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -720.00), SIMDE_FLOAT16_VALUE( 32.75), SIMDE_FLOAT16_VALUE( -487.75), + SIMDE_FLOAT16_VALUE( -705.50), SIMDE_FLOAT16_VALUE( -468.00), SIMDE_FLOAT16_VALUE( -789.00), SIMDE_FLOAT16_VALUE( -866.00) }, + INT32_C( 2), + { SIMDE_FLOAT16_VALUE( 1708.00), SIMDE_FLOAT16_VALUE( -2544.00), SIMDE_FLOAT16_VALUE( -9592.00), SIMDE_FLOAT16_VALUE( 9160.00), + SIMDE_FLOAT16_VALUE( 13560.00), SIMDE_FLOAT16_VALUE(-13088.00), SIMDE_FLOAT16_VALUE( 9080.00), SIMDE_FLOAT16_VALUE( -8376.00) } }, + { { SIMDE_FLOAT16_VALUE( -891.50), SIMDE_FLOAT16_VALUE( -299.00), SIMDE_FLOAT16_VALUE( -595.00), SIMDE_FLOAT16_VALUE( -662.00), + SIMDE_FLOAT16_VALUE( -914.00), SIMDE_FLOAT16_VALUE( 674.50), SIMDE_FLOAT16_VALUE( 771.50), SIMDE_FLOAT16_VALUE( 14.33) }, + { SIMDE_FLOAT16_VALUE( 880.00), SIMDE_FLOAT16_VALUE( 767.00), SIMDE_FLOAT16_VALUE( -738.50), SIMDE_FLOAT16_VALUE( 581.50), + SIMDE_FLOAT16_VALUE( -342.00), SIMDE_FLOAT16_VALUE( 580.50), SIMDE_FLOAT16_VALUE( 534.00), SIMDE_FLOAT16_VALUE( -671.00) }, + { SIMDE_FLOAT16_VALUE( -482.75), SIMDE_FLOAT16_VALUE( 382.25), SIMDE_FLOAT16_VALUE( 503.00), SIMDE_FLOAT16_VALUE( 35.00), + SIMDE_FLOAT16_VALUE( 315.50), SIMDE_FLOAT16_VALUE( -23.56), SIMDE_FLOAT16_VALUE( 53.88), SIMDE_FLOAT16_VALUE( 722.00) }, + INT32_C( 3), + { SIMDE_FLOAT16_VALUE( 25952.00), SIMDE_FLOAT16_VALUE(-27136.00), SIMDE_FLOAT16_VALUE( 19760.00), SIMDE_FLOAT16_VALUE(-21008.00), + SIMDE_FLOAT16_VALUE( 19408.00), SIMDE_FLOAT16_VALUE(-19648.00), SIMDE_FLOAT16_VALUE(-22720.00), SIMDE_FLOAT16_VALUE( 23504.00) } }, + { { SIMDE_FLOAT16_VALUE( 525.50), SIMDE_FLOAT16_VALUE( -679.00), SIMDE_FLOAT16_VALUE( 491.50), SIMDE_FLOAT16_VALUE( -505.00), + SIMDE_FLOAT16_VALUE( 914.50), SIMDE_FLOAT16_VALUE( -312.00), SIMDE_FLOAT16_VALUE( -404.50), SIMDE_FLOAT16_VALUE( -634.00) }, + { SIMDE_FLOAT16_VALUE( -86.62), SIMDE_FLOAT16_VALUE( -914.50), SIMDE_FLOAT16_VALUE( -839.50), SIMDE_FLOAT16_VALUE( 817.50), + SIMDE_FLOAT16_VALUE( -187.25), SIMDE_FLOAT16_VALUE( 422.75), SIMDE_FLOAT16_VALUE( 604.50), SIMDE_FLOAT16_VALUE( -735.00) }, + { SIMDE_FLOAT16_VALUE( -21.31), SIMDE_FLOAT16_VALUE( -29.59), SIMDE_FLOAT16_VALUE( -725.00), SIMDE_FLOAT16_VALUE( -503.00), + SIMDE_FLOAT16_VALUE( -75.56), SIMDE_FLOAT16_VALUE( 215.38), SIMDE_FLOAT16_VALUE( -742.00), SIMDE_FLOAT16_VALUE( -854.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 20016.00), SIMDE_FLOAT16_VALUE(-20176.00), SIMDE_FLOAT16_VALUE(-16928.00), SIMDE_FLOAT16_VALUE( 16912.00), + SIMDE_FLOAT16_VALUE( -8096.00), SIMDE_FLOAT16_VALUE( 8696.00), SIMDE_FLOAT16_VALUE( 15264.00), SIMDE_FLOAT16_VALUE(-16296.00) } }, + { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( 164.88), SIMDE_FLOAT16_VALUE( 304.75), SIMDE_FLOAT16_VALUE( -530.50), + SIMDE_FLOAT16_VALUE( -603.50), SIMDE_FLOAT16_VALUE( 730.50), SIMDE_FLOAT16_VALUE( 46.66), SIMDE_FLOAT16_VALUE( 629.00) }, + { SIMDE_FLOAT16_VALUE( -537.00), SIMDE_FLOAT16_VALUE( 637.00), SIMDE_FLOAT16_VALUE( 884.50), SIMDE_FLOAT16_VALUE( 378.25), + SIMDE_FLOAT16_VALUE( -10.17), SIMDE_FLOAT16_VALUE( -730.00), SIMDE_FLOAT16_VALUE( -981.50), SIMDE_FLOAT16_VALUE( 453.25) }, + { SIMDE_FLOAT16_VALUE( -575.00), SIMDE_FLOAT16_VALUE( 12.88), SIMDE_FLOAT16_VALUE( -667.50), SIMDE_FLOAT16_VALUE( 380.50), + SIMDE_FLOAT16_VALUE( 374.75), SIMDE_FLOAT16_VALUE( -222.50), SIMDE_FLOAT16_VALUE( 206.88), SIMDE_FLOAT16_VALUE( 502.25) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 7588.00), SIMDE_FLOAT16_VALUE( -8040.00), SIMDE_FLOAT16_VALUE( 5176.00), SIMDE_FLOAT16_VALUE( -5404.00), + SIMDE_FLOAT16_VALUE(-10008.00), SIMDE_FLOAT16_VALUE( 10136.00), SIMDE_FLOAT16_VALUE( 5884.00), SIMDE_FLOAT16_VALUE( -5212.00) } }, + { { SIMDE_FLOAT16_VALUE( -825.50), SIMDE_FLOAT16_VALUE( -472.75), SIMDE_FLOAT16_VALUE( -531.00), SIMDE_FLOAT16_VALUE( -366.75), + SIMDE_FLOAT16_VALUE( 143.12), SIMDE_FLOAT16_VALUE( 698.50), SIMDE_FLOAT16_VALUE( 700.00), SIMDE_FLOAT16_VALUE( 498.25) }, + { SIMDE_FLOAT16_VALUE( 908.00), SIMDE_FLOAT16_VALUE( 845.50), SIMDE_FLOAT16_VALUE( -383.50), SIMDE_FLOAT16_VALUE( 383.50), + SIMDE_FLOAT16_VALUE( 357.75), SIMDE_FLOAT16_VALUE( -900.50), SIMDE_FLOAT16_VALUE( -802.00), SIMDE_FLOAT16_VALUE( 966.50) }, + { SIMDE_FLOAT16_VALUE( -993.00), SIMDE_FLOAT16_VALUE( 477.50), SIMDE_FLOAT16_VALUE( -23.00), SIMDE_FLOAT16_VALUE( 102.38), + SIMDE_FLOAT16_VALUE( 988.50), SIMDE_FLOAT16_VALUE( -311.75), SIMDE_FLOAT16_VALUE( -668.50), SIMDE_FLOAT16_VALUE( 148.25) }, + INT32_C( 2), + { SIMDE_FLOAT16_VALUE(-20272.00), SIMDE_FLOAT16_VALUE( 18976.00), SIMDE_FLOAT16_VALUE( -9352.00), SIMDE_FLOAT16_VALUE( 8456.00), + SIMDE_FLOAT16_VALUE( 20848.00), SIMDE_FLOAT16_VALUE(-20016.00), SIMDE_FLOAT16_VALUE(-21536.00), SIMDE_FLOAT16_VALUE( 22720.00) } }, + { { SIMDE_FLOAT16_VALUE( 213.88), SIMDE_FLOAT16_VALUE( 337.75), SIMDE_FLOAT16_VALUE( 330.50), SIMDE_FLOAT16_VALUE( -88.56), + SIMDE_FLOAT16_VALUE( 191.12), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 775.50) }, + { SIMDE_FLOAT16_VALUE( 295.50), SIMDE_FLOAT16_VALUE( 687.00), SIMDE_FLOAT16_VALUE( 406.25), SIMDE_FLOAT16_VALUE( 439.50), + SIMDE_FLOAT16_VALUE( -827.50), SIMDE_FLOAT16_VALUE( 733.00), SIMDE_FLOAT16_VALUE( 499.00), SIMDE_FLOAT16_VALUE( 931.00) }, + { SIMDE_FLOAT16_VALUE( 790.00), SIMDE_FLOAT16_VALUE( -979.00), SIMDE_FLOAT16_VALUE( 70.62), SIMDE_FLOAT16_VALUE( -47.00), + SIMDE_FLOAT16_VALUE( 228.50), SIMDE_FLOAT16_VALUE( -233.50), SIMDE_FLOAT16_VALUE( -467.50), SIMDE_FLOAT16_VALUE( 545.00) }, + INT32_C( 3), + { SIMDE_FLOAT16_VALUE(-32080.00), SIMDE_FLOAT16_VALUE( 32624.00), SIMDE_FLOAT16_VALUE(-20320.00), SIMDE_FLOAT16_VALUE( 20560.00), + SIMDE_FLOAT16_VALUE(-34272.00), SIMDE_FLOAT16_VALUE( 33824.00), SIMDE_FLOAT16_VALUE(-43872.00), SIMDE_FLOAT16_VALUE( 44544.00) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + + simde_float16x8_t r; + SIMDE_CONSTIFY_4_(simde_vcmlaq_rot270_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); + simde_float16x8_t r = simde_vcmlaq_laneq_f16(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcmlaq_rot270_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32_t r_[4]; + simde_float32_t a[4]; + simde_float32_t b[4]; + const int lane; + simde_float32_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 355.18), SIMDE_FLOAT32_C( 169.63), SIMDE_FLOAT32_C( 116.87), SIMDE_FLOAT32_C( -467.19) }, + { SIMDE_FLOAT32_C( -513.94), SIMDE_FLOAT32_C( -999.71), SIMDE_FLOAT32_C( -285.25), SIMDE_FLOAT32_C( -931.88) }, + { SIMDE_FLOAT32_C( -839.57), SIMDE_FLOAT32_C( -681.40), SIMDE_FLOAT32_C( -117.60), SIMDE_FLOAT32_C( -459.86) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(839681.750000), SIMDE_FLOAT32_C(-839156.937500), SIMDE_FLOAT32_C(782495.375000), SIMDE_FLOAT32_C(-782845.687500) } }, + { { SIMDE_FLOAT32_C( -57.67), SIMDE_FLOAT32_C( -897.07), SIMDE_FLOAT32_C( 118.98), SIMDE_FLOAT32_C( -387.92) }, + { SIMDE_FLOAT32_C( -362.79), SIMDE_FLOAT32_C( 160.99), SIMDE_FLOAT32_C( -2.72), SIMDE_FLOAT32_C( 206.65) }, + { SIMDE_FLOAT32_C( 49.34), SIMDE_FLOAT32_C( 511.85), SIMDE_FLOAT32_C( 547.20), SIMDE_FLOAT32_C( -119.58) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(82345.062500), SIMDE_FLOAT32_C(-83299.804688), SIMDE_FLOAT32_C(105892.781250), SIMDE_FLOAT32_C(-106161.718750) } }, + { { SIMDE_FLOAT32_C( -219.54), SIMDE_FLOAT32_C( -959.14), SIMDE_FLOAT32_C( 943.92), SIMDE_FLOAT32_C( 628.48) }, + { SIMDE_FLOAT32_C( 446.65), SIMDE_FLOAT32_C( -500.77), SIMDE_FLOAT32_C( -347.79), SIMDE_FLOAT32_C( 813.11) }, + { SIMDE_FLOAT32_C( -542.25), SIMDE_FLOAT32_C( 232.48), SIMDE_FLOAT32_C( 684.35), SIMDE_FLOAT32_C( 710.26) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(271323.000000), SIMDE_FLOAT32_C(-272501.656250), SIMDE_FLOAT32_C(-439964.968750), SIMDE_FLOAT32_C(441537.375000) } }, + { { SIMDE_FLOAT32_C( 783.09), SIMDE_FLOAT32_C( -727.02), SIMDE_FLOAT32_C( -586.46), SIMDE_FLOAT32_C( 64.33) }, + { SIMDE_FLOAT32_C( -490.08), SIMDE_FLOAT32_C( 740.49), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -759.78) }, + { SIMDE_FLOAT32_C( -380.84), SIMDE_FLOAT32_C( 993.01), SIMDE_FLOAT32_C( -759.56), SIMDE_FLOAT32_C( 861.16) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(736097.062500), SIMDE_FLOAT32_C(-736041.000000), SIMDE_FLOAT32_C(-755055.625000), SIMDE_FLOAT32_C(754533.500000) } }, + { { SIMDE_FLOAT32_C( 998.31), SIMDE_FLOAT32_C( 538.40), SIMDE_FLOAT32_C( -191.12), SIMDE_FLOAT32_C( -434.48) }, + { SIMDE_FLOAT32_C( 592.83), SIMDE_FLOAT32_C( 820.32), SIMDE_FLOAT32_C( -296.84), SIMDE_FLOAT32_C( -612.30) }, + { SIMDE_FLOAT32_C( -552.34), SIMDE_FLOAT32_C( 329.08), SIMDE_FLOAT32_C( 765.26), SIMDE_FLOAT32_C( -531.08) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-452097.250000), SIMDE_FLOAT32_C(453633.968750), SIMDE_FLOAT32_C(338006.656250), SIMDE_FLOAT32_C(-338632.281250) } }, + { { SIMDE_FLOAT32_C( 52.61), SIMDE_FLOAT32_C( 606.93), SIMDE_FLOAT32_C( -894.22), SIMDE_FLOAT32_C( -854.38) }, + { SIMDE_FLOAT32_C( 972.80), SIMDE_FLOAT32_C( -807.39), SIMDE_FLOAT32_C( 668.59), SIMDE_FLOAT32_C( -228.19) }, + { SIMDE_FLOAT32_C( -528.51), SIMDE_FLOAT32_C( 730.93), SIMDE_FLOAT32_C( -230.95), SIMDE_FLOAT32_C( -140.17) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-590092.937500), SIMDE_FLOAT32_C(590752.500000), SIMDE_FLOAT32_C(-167685.140625), SIMDE_FLOAT32_C(165936.531250) } }, + { { SIMDE_FLOAT32_C( 556.73), SIMDE_FLOAT32_C( -701.90), SIMDE_FLOAT32_C( -356.50), SIMDE_FLOAT32_C( -532.42) }, + { SIMDE_FLOAT32_C( 856.94), SIMDE_FLOAT32_C( -261.67), SIMDE_FLOAT32_C( -208.07), SIMDE_FLOAT32_C( 27.93) }, + { SIMDE_FLOAT32_C( 924.32), SIMDE_FLOAT32_C( -863.60), SIMDE_FLOAT32_C( -687.65), SIMDE_FLOAT32_C( 238.39) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-241310.093750), SIMDE_FLOAT32_C(241164.921875), SIMDE_FLOAT32_C(25459.757812), SIMDE_FLOAT32_C(-26348.677734) } }, + { { SIMDE_FLOAT32_C( -286.79), SIMDE_FLOAT32_C( 630.61), SIMDE_FLOAT32_C( -989.22), SIMDE_FLOAT32_C( 223.21) }, + { SIMDE_FLOAT32_C( 812.31), SIMDE_FLOAT32_C( 667.33), SIMDE_FLOAT32_C( 841.41), SIMDE_FLOAT32_C( 735.52) }, + { SIMDE_FLOAT32_C( 308.52), SIMDE_FLOAT32_C( -189.06), SIMDE_FLOAT32_C( -63.33), SIMDE_FLOAT32_C( 837.76) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-126452.203125), SIMDE_FLOAT32_C(126796.023438), SIMDE_FLOAT32_C(-140046.640625), SIMDE_FLOAT32_C(139280.625000) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); + simde_float32x4_t r; + SIMDE_CONSTIFY_2_(simde_vcmlaq_rot270_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + + } + + return 0; +#else + fputc('\n', stdout); + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t r = simde_vcmlaq_rot270_laneq_f32(r_, a, b, lanes[i]); + + + simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vcmla_rot270_lane_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmla_rot270_lane_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmla_rot270_laneq_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmla_rot270_laneq_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_rot270_lane_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_rot270_lane_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_rot270_laneq_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_rot270_laneq_f32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" \ No newline at end of file From 1818df3349bdd5cbb04de3f9882eaff4342ea679 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Mon, 16 Oct 2023 18:15:31 +0800 Subject: [PATCH 07/29] [Neon] : add meson.build and simde/arm/neon.h --- meson.build | 6 ++++++ simde/arm/neon.h | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/meson.build b/meson.build index b537dbe50..152e02d15 100644 --- a/meson.build +++ b/meson.build @@ -25,6 +25,8 @@ simde_neon_families = [ 'bcax', 'bic', 'bsl', + 'cadd_rot270', + 'cadd_rot90', 'cage', 'cagt', 'ceq', @@ -40,6 +42,10 @@ simde_neon_families = [ 'cltz', 'clz', 'cmla', + 'cmla_lane', + 'cmla_rot180_lane', + 'cmla_rot270_lane', + 'cmla_rot90_lane', 'cmla_rot90', 'cmla_rot180', 'cmla_rot270', diff --git a/simde/arm/neon.h b/simde/arm/neon.h index 634abc122..eb71abe83 100644 --- a/simde/arm/neon.h +++ b/simde/arm/neon.h @@ -46,6 +46,8 @@ #include "neon/bcax.h" #include "neon/bic.h" #include "neon/bsl.h" +#include "neon/cadd_rot270.h" +#include "neon/cadd_rot90.h" #include "neon/cage.h" #include "neon/cagt.h" #include "neon/ceq.h" @@ -61,6 +63,10 @@ #include "neon/cltz.h" #include "neon/clz.h" #include "neon/cmla.h" +#include "neon/cmla_lane.h" +#include "neon/cmla_rot180_lane.h" +#include "neon/cmla_rot270_lane.h" +#include "neon/cmla_rot90_lane.h" #include "neon/cmla_rot90.h" #include "neon/cmla_rot180.h" #include "neon/cmla_rot270.h" From 6fbd8c22a56760d1c9ad59cd4e216842fcfd36cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Mon, 16 Oct 2023 18:31:23 +0800 Subject: [PATCH 08/29] [Fix] : add newline --- simde/arm/neon/cadd_rot270.h | 2 +- simde/arm/neon/cadd_rot90.h | 2 +- simde/arm/neon/cmla_lane.h | 2 +- simde/arm/neon/cmla_rot180_lane.h | 2 +- simde/arm/neon/cmla_rot270_lane.h | 2 +- simde/arm/neon/cmla_rot90_lane.h | 2 +- test/arm/neon/cadd_rot270.c | 2 +- test/arm/neon/cadd_rot90.c | 2 +- test/arm/neon/cmla_lane.c | 2 +- test/arm/neon/cmla_rot180_lane.c | 2 +- test/arm/neon/cmla_rot270_lane.c | 2 +- test/arm/neon/cmla_rot90_lane.c | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/simde/arm/neon/cadd_rot270.h b/simde/arm/neon/cadd_rot270.h index 0ccbec6c0..a115b96c7 100644 --- a/simde/arm/neon/cadd_rot270.h +++ b/simde/arm/neon/cadd_rot270.h @@ -195,4 +195,4 @@ simde_vcaddq_rot270_f64(simde_float64x2_t a, simde_float64x2_t b) { SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_CADD_ROT270_H) */ \ No newline at end of file +#endif /* !defined(SIMDE_ARM_NEON_CADD_ROT270_H) */ diff --git a/simde/arm/neon/cadd_rot90.h b/simde/arm/neon/cadd_rot90.h index d43accd7b..5da5443be 100644 --- a/simde/arm/neon/cadd_rot90.h +++ b/simde/arm/neon/cadd_rot90.h @@ -195,4 +195,4 @@ simde_vcaddq_rot90_f64(simde_float64x2_t a, simde_float64x2_t b) { SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_CADD_ROT90_H) */ \ No newline at end of file +#endif /* !defined(SIMDE_ARM_NEON_CADD_ROT90_H) */ diff --git a/simde/arm/neon/cmla_lane.h b/simde/arm/neon/cmla_lane.h index 36bf532e5..01452b8bc 100644 --- a/simde/arm/neon/cmla_lane.h +++ b/simde/arm/neon/cmla_lane.h @@ -313,4 +313,4 @@ simde_vcmlaq_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4 SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_CMLA_LANE_H) */ \ No newline at end of file +#endif /* !defined(SIMDE_ARM_NEON_CMLA_LANE_H) */ diff --git a/simde/arm/neon/cmla_rot180_lane.h b/simde/arm/neon/cmla_rot180_lane.h index f41fd8fd9..4ff5405d2 100644 --- a/simde/arm/neon/cmla_rot180_lane.h +++ b/simde/arm/neon/cmla_rot180_lane.h @@ -325,4 +325,4 @@ simde_vcmlaq_rot180_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_fl SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT180_LANE_H) */ \ No newline at end of file +#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT180_LANE_H) */ diff --git a/simde/arm/neon/cmla_rot270_lane.h b/simde/arm/neon/cmla_rot270_lane.h index 08d8de172..efa0fcdb4 100644 --- a/simde/arm/neon/cmla_rot270_lane.h +++ b/simde/arm/neon/cmla_rot270_lane.h @@ -322,4 +322,4 @@ simde_vcmlaq_rot270_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_fl SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT270_LANE_H) */ \ No newline at end of file +#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT270_LANE_H) */ diff --git a/simde/arm/neon/cmla_rot90_lane.h b/simde/arm/neon/cmla_rot90_lane.h index 86090922e..6ad289e3a 100644 --- a/simde/arm/neon/cmla_rot90_lane.h +++ b/simde/arm/neon/cmla_rot90_lane.h @@ -318,4 +318,4 @@ simde_vcmlaq_rot90_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_flo SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT90_LANE_H) */ \ No newline at end of file +#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT90_LANE_H) */ diff --git a/test/arm/neon/cadd_rot270.c b/test/arm/neon/cadd_rot270.c index 70bbaafd0..62159c07e 100644 --- a/test/arm/neon/cadd_rot270.c +++ b/test/arm/neon/cadd_rot270.c @@ -326,4 +326,4 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vcaddq_rot270_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcaddq_rot270_f64) SIMDE_TEST_FUNC_LIST_END -#include "test-neon-footer.h" \ No newline at end of file +#include "test-neon-footer.h" diff --git a/test/arm/neon/cadd_rot90.c b/test/arm/neon/cadd_rot90.c index a9f7d5f13..9119a7ae7 100644 --- a/test/arm/neon/cadd_rot90.c +++ b/test/arm/neon/cadd_rot90.c @@ -326,4 +326,4 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vcaddq_rot90_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcaddq_rot90_f64) SIMDE_TEST_FUNC_LIST_END -#include "test-neon-footer.h" \ No newline at end of file +#include "test-neon-footer.h" diff --git a/test/arm/neon/cmla_lane.c b/test/arm/neon/cmla_lane.c index 5ae9d6fba..eea427b8f 100644 --- a/test/arm/neon/cmla_lane.c +++ b/test/arm/neon/cmla_lane.c @@ -747,4 +747,4 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_laneq_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_laneq_f32) SIMDE_TEST_FUNC_LIST_END -#include "test-neon-footer.h" \ No newline at end of file +#include "test-neon-footer.h" diff --git a/test/arm/neon/cmla_rot180_lane.c b/test/arm/neon/cmla_rot180_lane.c index b24d61ca4..91f99ced2 100644 --- a/test/arm/neon/cmla_rot180_lane.c +++ b/test/arm/neon/cmla_rot180_lane.c @@ -763,4 +763,4 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_rot180_laneq_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_rot180_laneq_f32) SIMDE_TEST_FUNC_LIST_END -#include "test-neon-footer.h" \ No newline at end of file +#include "test-neon-footer.h" diff --git a/test/arm/neon/cmla_rot270_lane.c b/test/arm/neon/cmla_rot270_lane.c index 78027a830..deb0056bc 100644 --- a/test/arm/neon/cmla_rot270_lane.c +++ b/test/arm/neon/cmla_rot270_lane.c @@ -751,4 +751,4 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_rot270_laneq_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_rot270_laneq_f32) SIMDE_TEST_FUNC_LIST_END -#include "test-neon-footer.h" \ No newline at end of file +#include "test-neon-footer.h" diff --git a/test/arm/neon/cmla_rot90_lane.c b/test/arm/neon/cmla_rot90_lane.c index 076a2fc49..522fd12b1 100644 --- a/test/arm/neon/cmla_rot90_lane.c +++ b/test/arm/neon/cmla_rot90_lane.c @@ -759,4 +759,4 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_rot90_laneq_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcmlaq_rot90_laneq_f32) SIMDE_TEST_FUNC_LIST_END -#include "test-neon-footer.h" \ No newline at end of file +#include "test-neon-footer.h" From 72f8a1c81be3a271f6095f003d36e28330b31c0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Mon, 16 Oct 2023 19:11:11 +0800 Subject: [PATCH 09/29] [Fix] : formatting the code --- simde/arm/neon/cadd_rot270.h | 316 ++++---- simde/arm/neon/cadd_rot90.h | 316 ++++---- simde/arm/neon/cmla_lane.h | 497 ++++++------ simde/arm/neon/cmla_rot180_lane.h | 546 +++++++------ simde/arm/neon/cmla_rot270_lane.h | 526 +++++++------ simde/arm/neon/cmla_rot90_lane.h | 524 +++++++------ test/arm/neon/cadd_rot270.c | 444 ++++++----- test/arm/neon/cadd_rot90.c | 445 ++++++----- test/arm/neon/cmla_lane.c | 1208 +++++++++++++++++----------- test/arm/neon/cmla_rot180_lane.c | 1220 +++++++++++++++++------------ test/arm/neon/cmla_rot270_lane.c | 1204 +++++++++++++++++----------- test/arm/neon/cmla_rot90_lane.c | 1215 +++++++++++++++++----------- 12 files changed, 4899 insertions(+), 3562 deletions(-) diff --git a/simde/arm/neon/cadd_rot270.h b/simde/arm/neon/cadd_rot270.h index a115b96c7..1d0f1c833 100644 --- a/simde/arm/neon/cadd_rot270.h +++ b/simde/arm/neon/cadd_rot270.h @@ -1,28 +1,28 @@ /* SPDX-License-Identifier: MIT -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, copy, -* modify, merge, publish, distribute, sublicense, and/or sell copies -* of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -* -* Copyright: -* 2023 Chi-Wei Chu -*/ + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Chi-Wei Chu + */ #if !defined(SIMDE_ARM_NEON_CADD_ROT270_H) #define SIMDE_ARM_NEON_CADD_ROT270_H @@ -33,166 +33,162 @@ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcadd_rot270_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - return vcadd_rot270_f16(a, b); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; - r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; - } - #endif - - return simde_float16x4_from_private(r_); - #endif +simde_float16x4_t simde_vcadd_rot270_f16(simde_float16x4_t a, + simde_float16x4_t b) { +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcadd_rot270_f16(a, b); +#else + simde_float16x4_private r_, a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(b); + +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); + r_.values = b_.values + a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; + r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; + } +#endif + + return simde_float16x4_from_private(r_); +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcadd_rot270_f16 - #define vcadd_rot270_f16(a, b) simde_vcadd_rot270_f16(a, b) +#undef vcadd_rot270_f16 +#define vcadd_rot270_f16(a, b) simde_vcadd_rot270_f16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcaddq_rot270_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - return vcaddq_rot270_f16(a, b); - #else - simde_float16x8_private - r_ , - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, 13, 4, 15, 6); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; - r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; - } - #endif - - return simde_float16x8_from_private(r_); - #endif +simde_float16x8_t simde_vcaddq_rot270_f16(simde_float16x8_t a, + simde_float16x8_t b) { +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcaddq_rot270_f16(a, b); +#else + simde_float16x8_private r_, a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(b); + +#if defined(SIMDE_SHUFFLE_VECTOR_) + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, + 13, 4, 15, 6); + r_.values = b_.values + a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; + r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; + } +#endif + + return simde_float16x8_from_private(r_); +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcaddq_rot270_f16 - #define vcaddq_rot270_f16(a, b) simde_vcaddq_rot270_f16(a, b) +#undef vcaddq_rot270_f16 +#define vcaddq_rot270_f16(a, b) simde_vcaddq_rot270_f16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcadd_rot270_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - return vcadd_rot270_f32(a, b); - #else - simde_float32x2_private - r_ , - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; - r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; - } - #endif - - return simde_float32x2_from_private(r_); - #endif +simde_float32x2_t simde_vcadd_rot270_f32(simde_float32x2_t a, + simde_float32x2_t b) { +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcadd_rot270_f32(a, b); +#else + simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); + r_.values = b_.values + a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; + r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; + } +#endif + + return simde_float32x2_from_private(r_); +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcadd_rot270_f32 - #define vcadd_rot270_f32(a, b) simde_vcadd_rot270_f32(a, b) +#undef vcadd_rot270_f32 +#define vcadd_rot270_f32(a, b) simde_vcadd_rot270_f32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcaddq_rot270_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - return vcaddq_rot270_f32(a, b); - #else - simde_float32x4_private - r_ , - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; - r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; - } - #endif - - return simde_float32x4_from_private(r_); - #endif +simde_float32x4_t simde_vcaddq_rot270_f32(simde_float32x4_t a, + simde_float32x4_t b) { +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcaddq_rot270_f32(a, b); +#else + simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + +#if defined(SIMDE_SHUFFLE_VECTOR_) + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); + r_.values = b_.values + a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; + r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; + } +#endif + + return simde_float32x4_from_private(r_); +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcaddq_rot270_f32 - #define vcaddq_rot270_f32(a, b) simde_vcaddq_rot270_f32(a, b) +#undef vcaddq_rot270_f32 +#define vcaddq_rot270_f32(a, b) simde_vcaddq_rot270_f32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vcaddq_rot270_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - return vcaddq_rot270_f64(a, b); - #else - simde_float64x2_private - r_ , - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, b_.values, 3, 0); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; - r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; - } - #endif - - return simde_float64x2_from_private(r_); - #endif +simde_float64x2_t simde_vcaddq_rot270_f64(simde_float64x2_t a, + simde_float64x2_t b) { +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcaddq_rot270_f64(a, b); +#else + simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + +#if defined(SIMDE_SHUFFLE_VECTOR_) + b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, b_.values, 3, 0); + r_.values = b_.values + a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; + r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; + } +#endif + + return simde_float64x2_from_private(r_); +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcaddq_rot270_f64 - #define vcaddq_rot270_f64(a, b) simde_vcaddq_rot270_f64(a, b) +#undef vcaddq_rot270_f64 +#define vcaddq_rot270_f64(a, b) simde_vcaddq_rot270_f64(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_CADD_ROT270_H) */ +#endif /* !defined(SIMDE_ARM_NEON_CADD_ROT270_H) */ \ No newline at end of file diff --git a/simde/arm/neon/cadd_rot90.h b/simde/arm/neon/cadd_rot90.h index 5da5443be..4391aa47a 100644 --- a/simde/arm/neon/cadd_rot90.h +++ b/simde/arm/neon/cadd_rot90.h @@ -1,28 +1,28 @@ /* SPDX-License-Identifier: MIT -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, copy, -* modify, merge, publish, distribute, sublicense, and/or sell copies -* of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -* -* Copyright: -* 2023 Chi-Wei Chu -*/ + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Chi-Wei Chu + */ #if !defined(SIMDE_ARM_NEON_CADD_ROT90_H) #define SIMDE_ARM_NEON_CADD_ROT90_H @@ -33,166 +33,162 @@ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcadd_rot90_f16(simde_float16x4_t a, simde_float16x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - return vcadd_rot90_f16(a, b); - #else - simde_float16x4_private - r_, - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; - r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; - } - #endif - - return simde_float16x4_from_private(r_); - #endif +simde_float16x4_t simde_vcadd_rot90_f16(simde_float16x4_t a, + simde_float16x4_t b) { +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcadd_rot90_f16(a, b); +#else + simde_float16x4_private r_, a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(b); + +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); + r_.values = b_.values + a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; + r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; + } +#endif + + return simde_float16x4_from_private(r_); +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcadd_rot90_f16 - #define vcadd_rot90_f16(a, b) simde_vcadd_rot90_f16(a, b) +#undef vcadd_rot90_f16 +#define vcadd_rot90_f16(a, b) simde_vcadd_rot90_f16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcaddq_rot90_f16(simde_float16x8_t a, simde_float16x8_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - return vcaddq_rot90_f16(a, b); - #else - simde_float16x8_private - r_ , - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, 5, 12, 7, 14); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; - r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; - } - #endif - - return simde_float16x8_from_private(r_); - #endif +simde_float16x8_t simde_vcaddq_rot90_f16(simde_float16x8_t a, + simde_float16x8_t b) { +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcaddq_rot90_f16(a, b); +#else + simde_float16x8_private r_, a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(b); + +#if defined(SIMDE_SHUFFLE_VECTOR_) + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, + 5, 12, 7, 14); + r_.values = b_.values + a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; + r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; + } +#endif + + return simde_float16x8_from_private(r_); +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcaddq_rot90_f16 - #define vcaddq_rot90_f16(a, b) simde_vcaddq_rot90_f16(a, b) +#undef vcaddq_rot90_f16 +#define vcaddq_rot90_f16(a, b) simde_vcaddq_rot90_f16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcadd_rot90_f32(simde_float32x2_t a, simde_float32x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - return vcadd_rot90_f32(a, b); - #else - simde_float32x2_private - r_ , - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; - r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; - } - #endif - - return simde_float32x2_from_private(r_); - #endif +simde_float32x2_t simde_vcadd_rot90_f32(simde_float32x2_t a, + simde_float32x2_t b) { +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcadd_rot90_f32(a, b); +#else + simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); + r_.values = b_.values + a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; + r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; + } +#endif + + return simde_float32x2_from_private(r_); +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcadd_rot90_f32 - #define vcadd_rot90_f32(a, b) simde_vcadd_rot90_f32(a, b) +#undef vcadd_rot90_f32 +#define vcadd_rot90_f32(a, b) simde_vcadd_rot90_f32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcaddq_rot90_f32(simde_float32x4_t a, simde_float32x4_t b) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - return vcaddq_rot90_f32(a, b); - #else - simde_float32x4_private - r_ , - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; - r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; - } - #endif - - return simde_float32x4_from_private(r_); - #endif +simde_float32x4_t simde_vcaddq_rot90_f32(simde_float32x4_t a, + simde_float32x4_t b) { +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcaddq_rot90_f32(a, b); +#else + simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + +#if defined(SIMDE_SHUFFLE_VECTOR_) + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); + r_.values = b_.values + a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; + r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; + } +#endif + + return simde_float32x4_from_private(r_); +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcaddq_rot90_f32 - #define vcaddq_rot90_f32(a, b) simde_vcaddq_rot90_f32(a, b) +#undef vcaddq_rot90_f32 +#define vcaddq_rot90_f32(a, b) simde_vcaddq_rot90_f32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t -simde_vcaddq_rot90_f64(simde_float64x2_t a, simde_float64x2_t b) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - return vcaddq_rot90_f64(a, b); - #else - simde_float64x2_private - r_ , - a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, b_.values, 1, 2); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; - r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; - } - #endif - - return simde_float64x2_from_private(r_); - #endif +simde_float64x2_t simde_vcaddq_rot90_f64(simde_float64x2_t a, + simde_float64x2_t b) { +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcaddq_rot90_f64(a, b); +#else + simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + +#if defined(SIMDE_SHUFFLE_VECTOR_) + b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, b_.values, 1, 2); + r_.values = b_.values + a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; + r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; + } +#endif + + return simde_float64x2_from_private(r_); +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcaddq_rot90_f64 - #define vcaddq_rot90_f64(a, b) simde_vcaddq_rot90_f64(a, b) +#undef vcaddq_rot90_f64 +#define vcaddq_rot90_f64(a, b) simde_vcaddq_rot90_f64(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_CADD_ROT90_H) */ +#endif /* !defined(SIMDE_ARM_NEON_CADD_ROT90_H) */ \ No newline at end of file diff --git a/simde/arm/neon/cmla_lane.h b/simde/arm/neon/cmla_lane.h index 01452b8bc..ae9bc9b30 100644 --- a/simde/arm/neon/cmla_lane.h +++ b/simde/arm/neon/cmla_lane.h @@ -1,316 +1,333 @@ /* SPDX-License-Identifier: MIT -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, copy, -* modify, merge, publish, distribute, sublicense, and/or sell copies -* of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -* -* Copyright: -* 2023 Chi-Wei Chu -*/ + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Chi-Wei Chu + */ #if !defined(SIMDE_ARM_NEON_CMLA_LANE_H) #define SIMDE_ARM_NEON_CMLA_LANE_H -#include "types.h" #include "dup_lane.h" +#include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcmla_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1){ +simde_float16x4_t simde_vcmla_lane_f16(simde_float16x4_t r, simde_float16x4_t a, + simde_float16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float16x4_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_2_(vcmla_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - - #else - simde_float16x4_private - r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane])); +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_2_(vcmla_lane_f16, result, (HEDLEY_UNREACHABLE(), result), + lane, r, a, b); - #if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; - } - #endif - result = simde_float16x4_from_private(r_); - return result; - #endif +#else + simde_float16x4_private r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16( + simde_float16x4_to_private(b).values[lane])); + +#if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + } +#endif + result = simde_float16x4_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_lane_f16 - #define vcmla_lane_f16(r, a, b, lane) simde_vcmla_lane_f16(r, a, b, lane) +#undef vcmla_lane_f16 +#define vcmla_lane_f16(r, a, b, lane) simde_vcmla_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcmla_lane_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b, const int lane) +simde_float32x2_t simde_vcmla_lane_f32(simde_float32x2_t r, simde_float32x2_t a, + simde_float32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - return vcmla_lane_f32(r, a, b, 0); - #else - simde_float32x2_private - r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x2_to_private(b).values[lane])); +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcmla_lane_f32(r, a, b, 0); +#else + simde_float32x2_private r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32( + simde_float32x2_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + } +#endif - return simde_float32x2_from_private(r_); - #endif + return simde_float32x2_from_private(r_); +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_lane_f32 - #define vcmla_lane_f32(r, a, b, lane) simde_vcmla_lane_f32(r, a, b, lane) +#undef vcmla_lane_f32 +#define vcmla_lane_f32(r, a, b, lane) simde_vcmla_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcmla_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) +simde_float16x4_t simde_vcmla_laneq_f16(simde_float16x4_t r, + simde_float16x4_t a, + simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float16x4_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_2_(vcmla_laneq_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float16x4_private - r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane])); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; - } - #endif - result = simde_float16x4_from_private(r_); - return result; - #endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_2_(vcmla_laneq_f16, result, (HEDLEY_UNREACHABLE(), result), + lane, r, a, b); +#else + simde_float16x4_private r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane])); + +#if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + } +#endif + result = simde_float16x4_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_laneq_f16 - #define vcmla_laneq_f16(r, a, b, lane) simde_vcmla_laneq_f16(r, a, b, lane) +#undef vcmla_laneq_f16 +#define vcmla_laneq_f16(r, a, b, lane) simde_vcmla_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcmla_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x4_t b, const int lane) +simde_float32x2_t simde_vcmla_laneq_f32(simde_float32x2_t r, + simde_float32x2_t a, + simde_float32x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float32x2_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_2_(vcmla_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float32x2_private - r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x4_to_private(b).values[lane])); - +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_2_(vcmla_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), + lane, r, a, b); +#else + simde_float32x2_private r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32( + simde_float32x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; - } - #endif - result = simde_float32x2_from_private(r_); - return result; - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + } +#endif + result = simde_float32x2_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_laneq_f32 - #define vcmla_laneq_f32(r, a, b, lane) simde_vcmla_laneq_f32(r, a, b, lane) +#undef vcmla_laneq_f32 +#define vcmla_laneq_f32(r, a, b, lane) simde_vcmla_laneq_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcmlaq_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) +simde_float16x8_t simde_vcmlaq_lane_f16(simde_float16x8_t r, + simde_float16x8_t a, + simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float16x8_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_2_(vcmlaq_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float16x8_private - r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16(simde_float16x4_to_private(b).values[lane])); - +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_2_(vcmlaq_lane_f16, result, (HEDLEY_UNREACHABLE(), result), + lane, r, a, b); +#else + simde_float16x8_private r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16( + simde_float16x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; - } - #endif - result = simde_float16x8_from_private(r_); - return result; - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, + 4, 6, 6); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + } +#endif + result = simde_float16x8_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_lane_f16 - #define vcmlaq_lane_f16(r, a, b, lane) simde_vcmlaq_lane_f16(r, a, b, lane) +#undef vcmlaq_lane_f16 +#define vcmlaq_lane_f16(r, a, b, lane) simde_vcmlaq_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcmlaq_lane_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x2_t b, const int lane) +simde_float32x4_t simde_vcmlaq_lane_f32(simde_float32x4_t r, + simde_float32x4_t a, + simde_float32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - return vcmlaq_lane_f32(r, a, b, 0); - #else - simde_float32x4_private - r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x2_to_private(b).values[lane])); - +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcmlaq_lane_f32(r, a, b, 0); +#else + simde_float32x4_private r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32( + simde_float32x2_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + } +#endif - return simde_float32x4_from_private(r_); - #endif + return simde_float32x4_from_private(r_); +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_lane_f32 - #define vcmlaq_lane_f32(r, a, b, lane) simde_vcmlaq_lane_f32(r, a, b, lane) +#undef vcmlaq_lane_f32 +#define vcmlaq_lane_f32(r, a, b, lane) simde_vcmlaq_lane_f32(r, a, b, lane) #endif - SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcmlaq_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) +simde_float16x8_t simde_vcmlaq_laneq_f16(simde_float16x8_t r, + simde_float16x8_t a, + simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_float16x8_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_4_(vcmlaq_laneq_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float16x8_private - r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16(simde_float16x8_to_private(b).values[lane])); +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_4_(vcmlaq_laneq_f16, result, (HEDLEY_UNREACHABLE(), result), + lane, r, a, b); +#else + simde_float16x8_private r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16( + simde_float16x8_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; - } - #endif - result = simde_float16x8_from_private(r_); - return result; - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, + 4, 6, 6); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + } +#endif + result = simde_float16x8_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_laneq_f16 - #define vcmlaq_laneq_f16(r, a, b, lane) simde_vcmlaq_laneq_f16(r, a, b, lane) +#undef vcmlaq_laneq_f16 +#define vcmlaq_laneq_f16(r, a, b, lane) simde_vcmlaq_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcmlaq_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b, const int lane) +simde_float32x4_t simde_vcmlaq_laneq_f32(simde_float32x4_t r, + simde_float32x4_t a, + simde_float32x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float32x4_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_2_(vcmlaq_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float32x4_private - r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x4_to_private(b).values[lane])); +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_2_(vcmlaq_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), + lane, r, a, b); +#else + simde_float32x4_private r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32( + simde_float32x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; - } - #endif - result = simde_float32x4_from_private(r_); - return result; - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + } +#endif + result = simde_float32x4_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_laneq_f32 - #define vcmlaq_laneq_f32(r, a, b, lane) simde_vcmlaq_laneq_f32(r, a, b, lane) +#undef vcmlaq_laneq_f32 +#define vcmlaq_laneq_f32(r, a, b, lane) simde_vcmlaq_laneq_f32(r, a, b, lane) #endif - SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_CMLA_LANE_H) */ +#endif /* !defined(SIMDE_ARM_NEON_CMLA_LANE_H) */ \ No newline at end of file diff --git a/simde/arm/neon/cmla_rot180_lane.h b/simde/arm/neon/cmla_rot180_lane.h index 4ff5405d2..1e5c198e1 100644 --- a/simde/arm/neon/cmla_rot180_lane.h +++ b/simde/arm/neon/cmla_rot180_lane.h @@ -1,328 +1,366 @@ /* SPDX-License-Identifier: MIT -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, copy, -* modify, merge, publish, distribute, sublicense, and/or sell copies -* of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -* -* Copyright: -* 2023 Chi-Wei Chu -*/ + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Chi-Wei Chu + */ #if !defined(SIMDE_ARM_NEON_CMLA_ROT180_LANE_H) #define SIMDE_ARM_NEON_CMLA_ROT180_LANE_H -#include "types.h" #include "dup_lane.h" +#include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcmla_rot180_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) +simde_float16x4_t simde_vcmla_rot180_lane_f16(simde_float16x4_t r, + simde_float16x4_t a, + simde_float16x4_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float16x4_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_2_(vcmla_rot180_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - - #else - simde_float16x4_private - r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane])); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } - #endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_2_(vcmla_rot180_lane_f16, result, + (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + +#else + simde_float16x4_private r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16( + simde_float16x4_to_private(b).values[lane])); + +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } +#endif - result = simde_float16x4_from_private(r_); - return result; - #endif + result = simde_float16x4_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot180_lane_f16 - #define vcmla_rot180_lane_f16(r, a, b, lane) simde_vcmla_rot180_lane_f16(r, a, b, lane) +#undef vcmla_rot180_lane_f16 +#define vcmla_rot180_lane_f16(r, a, b, lane) \ + simde_vcmla_rot180_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcmla_rot180_lane_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b, const int lane) +simde_float32x2_t simde_vcmla_rot180_lane_f32(simde_float32x2_t r, + simde_float32x2_t a, + simde_float32x2_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - return vcmla_rot180_lane_f32(r, a, b, 0); - #else - simde_float32x2_private - r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x2_to_private(b).values[lane])); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, -b_.values, 0, 1); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } - #endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcmla_rot180_lane_f32(r, a, b, 0); +#else + simde_float32x2_private r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32( + simde_float32x2_to_private(b).values[lane])); + +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, -b_.values, 0, 1); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } +#endif - return simde_float32x2_from_private(r_); - #endif + return simde_float32x2_from_private(r_); +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot180_lane_f32 - #define vcmla_rot180_lane_f32(r, a, b, lane) simde_vcmla_rot180_lane_f32(r, a, b, lane) +#undef vcmla_rot180_lane_f32 +#define vcmla_rot180_lane_f32(r, a, b, lane) \ + simde_vcmla_rot180_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) +simde_float16x8_t simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, + simde_float16x8_t a, + simde_float16x4_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float16x8_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_2_(vcmlaq_rot180_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float16x8_private - r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16(simde_float16x4_to_private(b).values[lane])); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, 5, 6, 7); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } - #endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_2_(vcmlaq_rot180_lane_f16, result, + (HEDLEY_UNREACHABLE(), result), lane, r, a, b); +#else + simde_float16x8_private r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16( + simde_float16x4_to_private(b).values[lane])); + +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, + 4, 6, 6); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, + 5, 6, 7); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } +#endif - result = simde_float16x8_from_private(r_); - return result; - #endif + result = simde_float16x8_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot180_lane_f16 - #define vcmlaq_rot180_lane_f16(r, a, b, lane) simde_vcmlaq_rot180_lane_f16(r, a, b, lane) +#undef vcmlaq_rot180_lane_f16 +#define vcmlaq_rot180_lane_f16(r, a, b, lane) \ + simde_vcmlaq_rot180_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcmlaq_rot180_lane_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x2_t b, const int lane) +simde_float32x4_t simde_vcmlaq_rot180_lane_f32(simde_float32x4_t r, + simde_float32x4_t a, + simde_float32x2_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - return vcmlaq_rot180_lane_f32(r, a, b, 0); - #else - simde_float32x4_private - r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x2_to_private(b).values[lane])); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 0, 1, 2, 3); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } - #endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcmlaq_rot180_lane_f32(r, a, b, 0); +#else + simde_float32x4_private r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32( + simde_float32x2_to_private(b).values[lane])); + +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 0, 1, 2, 3); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } +#endif - return simde_float32x4_from_private(r_); - #endif + return simde_float32x4_from_private(r_); +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot180_lane_f32 - #define vcmlaq_rot180_lane_f32(r, a, b, lane) simde_vcmlaq_rot180_lane_f32(r, a, b, lane) +#undef vcmlaq_rot180_lane_f32 +#define vcmlaq_rot180_lane_f32(r, a, b, lane) \ + simde_vcmlaq_rot180_lane_f32(r, a, b, lane) #endif - SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcmla_rot180_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) +simde_float16x4_t simde_vcmla_rot180_laneq_f16(simde_float16x4_t r, + simde_float16x4_t a, + simde_float16x8_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float16x4_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_2_(vcmla_rot180_laneq_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float16x4_private - r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane])); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } - #endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_2_(vcmla_rot180_laneq_f16, result, + (HEDLEY_UNREACHABLE(), result), lane, r, a, b); +#else + simde_float16x4_private r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane])); + +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } +#endif - result = simde_float16x4_from_private(r_); - return result; - #endif + result = simde_float16x4_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot180_laneq_f16 - #define vcmla_rot180_laneq_f16(r, a, b, lane) simde_vcmla_rot180_laneq_f16(r, a, b, lane) +#undef vcmla_rot180_laneq_f16 +#define vcmla_rot180_laneq_f16(r, a, b, lane) \ + simde_vcmla_rot180_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcmla_rot180_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x4_t b, const int lane) +simde_float32x2_t simde_vcmla_rot180_laneq_f32(simde_float32x2_t r, + simde_float32x2_t a, + simde_float32x4_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float32x2_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_2_(vcmla_rot180_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float32x2_private - r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x4_to_private(b).values[lane])); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, -b_.values, 0, 1); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } - #endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_2_(vcmla_rot180_laneq_f32, result, + (HEDLEY_UNREACHABLE(), result), lane, r, a, b); +#else + simde_float32x2_private r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32( + simde_float32x4_to_private(b).values[lane])); + +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, -b_.values, 0, 1); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } +#endif - result = simde_float32x2_from_private(r_); - return result; - #endif + result = simde_float32x2_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot180_laneq_f32 - #define vcmla_rot180_laneq_f32(r, a, b, lane) simde_vcmla_rot180_laneq_f32(r, a, b, lane) +#undef vcmla_rot180_laneq_f32 +#define vcmla_rot180_laneq_f32(r, a, b, lane) \ + simde_vcmla_rot180_laneq_f32(r, a, b, lane) #endif - - SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { +simde_float16x8_t simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, + simde_float16x8_t a, + simde_float16x8_t b, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_float16x8_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_4_(vcmlaq_rot180_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float16x8_private - r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16(simde_float16x8_to_private(b).values[lane])); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, 5, 6, 7); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } - #endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_4_(vcmlaq_rot180_laneq_f32, result, + (HEDLEY_UNREACHABLE(), result), lane, r, a, b); +#else + simde_float16x8_private r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16( + simde_float16x8_to_private(b).values[lane])); + +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, + 4, 6, 6); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, + 5, 6, 7); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } +#endif - result = simde_float16x8_from_private(r_); - return result; - #endif + result = simde_float16x8_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot180_laneq_f16 - #define vcmlaq_rot180_laneq_f16(r, a, b, lane) simde_vcmlaq_rot180_laneq_f16(r, a, b, lane) +#undef vcmlaq_rot180_laneq_f16 +#define vcmlaq_rot180_laneq_f16(r, a, b, lane) \ + simde_vcmlaq_rot180_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcmlaq_rot180_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b, const int lane) +simde_float32x4_t simde_vcmlaq_rot180_laneq_f32(simde_float32x4_t r, + simde_float32x4_t a, + simde_float32x4_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float32x4_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_2_(vcmlaq_rot180_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float32x4_private - r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x4_to_private(b).values[lane])); - - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 0, 1, 2, 3); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } - #endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_2_(vcmlaq_rot180_laneq_f32, result, + (HEDLEY_UNREACHABLE(), result), lane, r, a, b); +#else + simde_float32x4_private r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32( + simde_float32x4_to_private(b).values[lane])); + +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 0, 1, 2, 3); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } +#endif - result = simde_float32x4_from_private(r_); - return result; - #endif + result = simde_float32x4_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot180_laneq_f32 - #define vcmlaq_rot180_laneq_f32(r, a, b, lane) simde_vcmlaq_rot180_laneq_f32(r, a, b, lane) +#undef vcmlaq_rot180_laneq_f32 +#define vcmlaq_rot180_laneq_f32(r, a, b, lane) \ + simde_vcmlaq_rot180_laneq_f32(r, a, b, lane) #endif - SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT180_LANE_H) */ +#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT180_LANE_H) */ \ No newline at end of file diff --git a/simde/arm/neon/cmla_rot270_lane.h b/simde/arm/neon/cmla_rot270_lane.h index efa0fcdb4..ad82c5704 100644 --- a/simde/arm/neon/cmla_rot270_lane.h +++ b/simde/arm/neon/cmla_rot270_lane.h @@ -1,325 +1,365 @@ /* SPDX-License-Identifier: MIT -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, copy, -* modify, merge, publish, distribute, sublicense, and/or sell copies -* of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -* -* Copyright: -* 2023 Chi-Wei Chu -*/ + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Chi-Wei Chu + */ #if !defined(SIMDE_ARM_NEON_CMLA_ROT270_LANE_H) #define SIMDE_ARM_NEON_CMLA_ROT270_LANE_H -#include "types.h" #include "dup_lane.h" +#include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ - SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcmla_rot270_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) +simde_float16x4_t simde_vcmla_rot270_lane_f16(simde_float16x4_t r, + simde_float16x4_t a, + simde_float16x4_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float16x4_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_2_(vcmla_rot270_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float16x4_private - r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane])); +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_2_(vcmla_rot270_lane_f16, result, + (HEDLEY_UNREACHABLE(), result), lane, r, a, b); +#else + simde_float16x4_private r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16( + simde_float16x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } +#endif - result = simde_float16x4_from_private(r_); - return result; - #endif + result = simde_float16x4_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot270_lane_f16 - #define vcmla_rot270_lane_f16(r, a, b, lane) simde_vcmla_rot270_lane_f16(r, a, b, lane) +#undef vcmla_rot270_lane_f16 +#define vcmla_rot270_lane_f16(r, a, b, lane) \ + simde_vcmla_rot270_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcmla_rot270_lane_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b, const int lane) +simde_float32x2_t simde_vcmla_rot270_lane_f32(simde_float32x2_t r, + simde_float32x2_t a, + simde_float32x2_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - return vcmla_rot270_lane_f32(r, a, b, 0); - #else - simde_float32x2_private - r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x2_to_private(b).values[lane])); +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcmla_rot270_lane_f32(r, a, b, 0); +#else + simde_float32x2_private r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32( + simde_float32x2_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } +#endif - return simde_float32x2_from_private(r_); - #endif + return simde_float32x2_from_private(r_); +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot270_lane_f32 - #define vcmla_rot270_lane_f32(r, a, b, lane) simde_vcmla_rot270_lane_f32(r, a, b, lane) +#undef vcmla_rot270_lane_f32 +#define vcmla_rot270_lane_f32(r, a, b, lane) \ + simde_vcmla_rot270_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) +simde_float16x8_t simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, + simde_float16x8_t a, + simde_float16x4_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float16x8_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_2_(vcmlaq_rot270_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float16x8_private - r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16(simde_float16x4_to_private(b).values[lane])); +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_2_(vcmlaq_rot270_lane_f16, result, + (HEDLEY_UNREACHABLE(), result), lane, r, a, b); +#else + simde_float16x8_private r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16( + simde_float16x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, 13, 4, 15, 6); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, + 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, + 13, 4, 15, 6); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } +#endif - result = simde_float16x8_from_private(r_); - return result; - #endif + result = simde_float16x8_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot270_lane_f16 - #define vcmlaq_rot270_lane_f16(r, a, b, lane) simde_vcmlaq_rot270_lane_f16(r, a, b, lane) +#undef vcmlaq_rot270_lane_f16 +#define vcmlaq_rot270_lane_f16(r, a, b, lane) \ + simde_vcmlaq_rot270_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcmlaq_rot270_lane_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x2_t b, const int lane) +simde_float32x4_t simde_vcmlaq_rot270_lane_f32(simde_float32x4_t r, + simde_float32x4_t a, + simde_float32x2_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - return vcmlaq_rot270_lane_f32(r, a, b, 0); - #else - simde_float32x4_private - r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x2_to_private(b).values[lane])); +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcmlaq_rot270_lane_f32(r, a, b, 0); +#else + simde_float32x4_private r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32( + simde_float32x2_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } +#endif - return simde_float32x4_from_private(r_); - #endif + return simde_float32x4_from_private(r_); +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot270_lane_f32 - #define vcmlaq_rot270_lane_f32(r, a, b, lane) simde_vcmlaq_rot270_lane_f32(r, a, b, lane) +#undef vcmlaq_rot270_lane_f32 +#define vcmlaq_rot270_lane_f32(r, a, b, lane) \ + simde_vcmlaq_rot270_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcmla_rot270_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) +simde_float16x4_t simde_vcmla_rot270_laneq_f16(simde_float16x4_t r, + simde_float16x4_t a, + simde_float16x8_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float16x4_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_2_(vcmla_rot270_laneq_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float16x4_private - r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane])); +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_2_(vcmla_rot270_laneq_f16, result, + (HEDLEY_UNREACHABLE(), result), lane, r, a, b); +#else + simde_float16x4_private r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } +#endif - result = simde_float16x4_from_private(r_); - return result; - #endif + result = simde_float16x4_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot270_laneq_f16 - #define vcmla_rot270_laneq_f16(r, a, b, lane) simde_vcmla_rot270_laneq_f16(r, a, b, lane) +#undef vcmla_rot270_laneq_f16 +#define vcmla_rot270_laneq_f16(r, a, b, lane) \ + simde_vcmla_rot270_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcmla_rot270_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x4_t b, const int lane) +simde_float32x2_t simde_vcmla_rot270_laneq_f32(simde_float32x2_t r, + simde_float32x2_t a, + simde_float32x4_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float32x2_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_2_(vcmla_rot270_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float32x2_private - r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x4_to_private(b).values[lane])); +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_2_(vcmla_rot270_laneq_f32, result, + (HEDLEY_UNREACHABLE(), result), lane, r, a, b); +#else + simde_float32x2_private r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32( + simde_float32x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } +#endif - result = simde_float32x2_from_private(r_); - return result; - #endif + result = simde_float32x2_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot270_laneq_f32 - #define vcmla_rot270_laneq_f32(r, a, b, lane) simde_vcmla_rot270_laneq_f32(r, a, b, lane) +#undef vcmla_rot270_laneq_f32 +#define vcmla_rot270_laneq_f32(r, a, b, lane) \ + simde_vcmla_rot270_laneq_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { +simde_float16x8_t simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, + simde_float16x8_t a, + simde_float16x8_t b, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_float16x8_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_4_(vcmlaq_rot270_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float16x8_private - r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16(simde_float16x8_to_private(b).values[lane])); +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_4_(vcmlaq_rot270_laneq_f32, result, + (HEDLEY_UNREACHABLE(), result), lane, r, a, b); +#else + simde_float16x8_private r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16( + simde_float16x8_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, 13, 4, 15, 6); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, + 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, + 13, 4, 15, 6); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } +#endif - result = simde_float16x8_from_private(r_); - return result; - #endif + result = simde_float16x8_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot270_laneq_f16 - #define vcmlaq_rot270_laneq_f16(r, a, b, lane) simde_vcmlaq_rot270_laneq_f16(r, a, b, lane) +#undef vcmlaq_rot270_laneq_f16 +#define vcmlaq_rot270_laneq_f16(r, a, b, lane) \ + simde_vcmlaq_rot270_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcmlaq_rot270_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b, const int lane) +simde_float32x4_t simde_vcmlaq_rot270_laneq_f32(simde_float32x4_t r, + simde_float32x4_t a, + simde_float32x4_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float32x4_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_2_(vcmlaq_rot270_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float32x4_private - r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x4_to_private(b).values[lane])); +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_2_(vcmlaq_rot270_laneq_f32, result, + (HEDLEY_UNREACHABLE(), result), lane, r, a, b); +#else + simde_float32x4_private r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32( + simde_float32x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } +#endif - result = simde_float32x4_from_private(r_); - return result; - #endif + result = simde_float32x4_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot270_laneq_f32 - #define vcmlaq_rot270_laneq_f32(r, a, b, lane) simde_vcmlaq_rot270_laneq_f32(r, a, b, lane) +#undef vcmlaq_rot270_laneq_f32 +#define vcmlaq_rot270_laneq_f32(r, a, b, lane) \ + simde_vcmlaq_rot270_laneq_f32(r, a, b, lane) #endif - SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT270_LANE_H) */ +#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT270_LANE_H) */ \ No newline at end of file diff --git a/simde/arm/neon/cmla_rot90_lane.h b/simde/arm/neon/cmla_rot90_lane.h index 6ad289e3a..d5f766a74 100644 --- a/simde/arm/neon/cmla_rot90_lane.h +++ b/simde/arm/neon/cmla_rot90_lane.h @@ -1,321 +1,363 @@ /* SPDX-License-Identifier: MIT -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, copy, -* modify, merge, publish, distribute, sublicense, and/or sell copies -* of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -* -* Copyright: -* 2023 Chi-Wei Chu -*/ + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Chi-Wei Chu + */ #if !defined(SIMDE_ARM_NEON_CMLA_ROT90_LANE_H) #define SIMDE_ARM_NEON_CMLA_ROT90_LANE_H -#include "types.h" #include "dup_lane.h" +#include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcmla_rot90_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) +simde_float16x4_t simde_vcmla_rot90_lane_f16(simde_float16x4_t r, + simde_float16x4_t a, + simde_float16x4_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float16x4_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_2_(vcmla_rot90_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float16x4_private - r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane])); +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_2_(vcmla_rot90_lane_f16, result, + (HEDLEY_UNREACHABLE(), result), lane, r, a, b); +#else + simde_float16x4_private r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16( + simde_float16x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } - #endif - result = simde_float16x4_from_private(r_); - return result; - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } +#endif + result = simde_float16x4_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot90_lane_f16 - #define vcmla_rot90_lane_f16(r, a, b, lane) simde_vcmla_rot90_lane_f16(r, a, b, lane) +#undef vcmla_rot90_lane_f16 +#define vcmla_rot90_lane_f16(r, a, b, lane) \ + simde_vcmla_rot90_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcmla_rot90_lane_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b, const int lane) +simde_float32x2_t simde_vcmla_rot90_lane_f32(simde_float32x2_t r, + simde_float32x2_t a, + simde_float32x2_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - return vcmla_rot90_lane_f32(r, a, b, 0); - #else - simde_float32x2_private - r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x2_to_private(b).values[lane])); +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcmla_rot90_lane_f32(r, a, b, 0); +#else + simde_float32x2_private r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32( + simde_float32x2_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } +#endif - return simde_float32x2_from_private(r_); - #endif + return simde_float32x2_from_private(r_); +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot90_lane_f32 - #define vcmla_rot90_lane_f32(r, a, b, lane) simde_vcmla_rot90_lane_f32(r, a, b, lane) +#undef vcmla_rot90_lane_f32 +#define vcmla_rot90_lane_f32(r, a, b, lane) \ + simde_vcmla_rot90_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) +simde_float16x8_t simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, + simde_float16x8_t a, + simde_float16x4_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float16x8_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_2_(vcmlaq_rot90_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float16x8_private - r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16(simde_float16x4_to_private(b).values[lane])); +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_2_(vcmlaq_rot90_lane_f16, result, + (HEDLEY_UNREACHABLE(), result), lane, r, a, b); +#else + simde_float16x8_private r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16( + simde_float16x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, 5, 12, 7, 14); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, + 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, + 5, 12, 7, 14); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } +#endif - result = simde_float16x8_from_private(r_); - return result; - #endif + result = simde_float16x8_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot90_lane_f16 - #define vcmlaq_rot90_lane_f16(r, a, b, lane) simde_vcmlaq_rot90_lane_f16(r, a, b, lane) +#undef vcmlaq_rot90_lane_f16 +#define vcmlaq_rot90_lane_f16(r, a, b, lane) \ + simde_vcmlaq_rot90_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcmlaq_rot90_lane_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x2_t b, const int lane) +simde_float32x4_t simde_vcmlaq_rot90_lane_f32(simde_float32x4_t r, + simde_float32x4_t a, + simde_float32x2_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - return vcmlaq_rot90_lane_f32(r, a, b, 0); - #else - simde_float32x4_private - r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x2_to_private(b).values[lane])); +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcmlaq_rot90_lane_f32(r, a, b, 0); +#else + simde_float32x4_private r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32( + simde_float32x2_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } +#endif - return simde_float32x4_from_private(r_); - #endif + return simde_float32x4_from_private(r_); +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot90_lane_f32 - #define vcmlaq_rot90_lane_f32(r, a, b, lane) simde_vcmlaq_rot90_lane_f32(r, a, b, lane) +#undef vcmlaq_rot90_lane_f32 +#define vcmlaq_rot90_lane_f32(r, a, b, lane) \ + simde_vcmlaq_rot90_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcmla_rot90_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) +simde_float16x4_t simde_vcmla_rot90_laneq_f16(simde_float16x4_t r, + simde_float16x4_t a, + simde_float16x8_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float16x4_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_2_(vcmla_rot90_laneq_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float16x4_private - r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane])); +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_2_(vcmla_rot90_laneq_f16, result, + (HEDLEY_UNREACHABLE(), result), lane, r, a, b); +#else + simde_float16x4_private r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } +#endif - result = simde_float16x4_from_private(r_); - return result; - #endif + result = simde_float16x4_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot90_laneq_f16 - #define vcmla_rot90_laneq_f16(r, a, b, lane) simde_vcmla_rot90_laneq_f16(r, a, b, lane) +#undef vcmla_rot90_laneq_f16 +#define vcmla_rot90_laneq_f16(r, a, b, lane) \ + simde_vcmla_rot90_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcmla_rot90_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x4_t b, const int lane) +simde_float32x2_t simde_vcmla_rot90_laneq_f32(simde_float32x2_t r, + simde_float32x2_t a, + simde_float32x4_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float32x2_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_2_(vcmla_rot90_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float32x2_private - r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x4_to_private(b).values[lane])); +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_2_(vcmla_rot90_laneq_f32, result, + (HEDLEY_UNREACHABLE(), result), lane, r, a, b); +#else + simde_float32x2_private r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32( + simde_float32x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } +#endif - result = simde_float32x2_from_private(r_); - return result; - #endif + result = simde_float32x2_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot90_laneq_f32 - #define vcmla_rot90_laneq_f32(r, a, b, lane) simde_vcmla_rot90_laneq_f32(r, a, b, lane) +#undef vcmla_rot90_laneq_f32 +#define vcmla_rot90_laneq_f32(r, a, b, lane) \ + simde_vcmla_rot90_laneq_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { +simde_float16x8_t simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, + simde_float16x8_t a, + simde_float16x8_t b, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_float16x8_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_4_(vcmlaq_rot90_laneq_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float16x8_private - r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16(simde_float16x8_to_private(b).values[lane])); +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_4_(vcmlaq_rot90_laneq_f16, result, + (HEDLEY_UNREACHABLE(), result), lane, r, a, b); +#else + simde_float16x8_private r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16( + simde_float16x8_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, 5, 12, 7, 14); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, + 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, + 5, 12, 7, 14); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } +#endif - result = simde_float16x8_from_private(r_); - return result; - #endif + result = simde_float16x8_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot90_laneq_f16 - #define vcmlaq_rot90_laneq_f16(r, a, b, lane) simde_vcmlaq_rot90_laneq_f16(r, a, b, lane) +#undef vcmlaq_rot90_laneq_f16 +#define vcmlaq_rot90_laneq_f16(r, a, b, lane) \ + simde_vcmlaq_rot90_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcmlaq_rot90_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b, const int lane) +simde_float32x4_t simde_vcmlaq_rot90_laneq_f32(simde_float32x4_t r, + simde_float32x4_t a, + simde_float32x4_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float32x4_t result; - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) - SIMDE_CONSTIFY_2_(vcmlaq_rot90_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - #else - simde_float32x4_private - r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x4_to_private(b).values[lane])); +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + SIMDE_CONSTIFY_2_(vcmlaq_rot90_laneq_f32, result, + (HEDLEY_UNREACHABLE(), result), lane, r, a, b); +#else + simde_float32x4_private r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32( + simde_float32x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } - #endif - result = simde_float32x4_from_private(r_); - return result; - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } +#endif + result = simde_float32x4_from_private(r_); + return result; +#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot90_laneq_f32 - #define vcmlaq_rot90_laneq_f32(r, a, b, lane) simde_vcmlaq_rot90_laneq_f32(r, a, b, lane) +#undef vcmlaq_rot90_laneq_f32 +#define vcmlaq_rot90_laneq_f32(r, a, b, lane) \ + simde_vcmlaq_rot90_laneq_f32(r, a, b, lane) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT90_LANE_H) */ +#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT90_LANE_H) */ \ No newline at end of file diff --git a/test/arm/neon/cadd_rot270.c b/test/arm/neon/cadd_rot270.c index 62159c07e..f73d0cd45 100644 --- a/test/arm/neon/cadd_rot270.c +++ b/test/arm/neon/cadd_rot270.c @@ -1,55 +1,77 @@ #define SIMDE_TEST_ARM_NEON_INSN cadd_rot270 -#include "test-neon.h" #include "../../../simde/arm/neon/cadd_rot270.h" -static int -test_simde_vcadd_rot270_f16 (SIMDE_MUNIT_TEST_ARGS) { +#include "test-neon.h" + +static int test_simde_vcadd_rot270_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float16_t a[4]; simde_float16_t b[4]; simde_float16_t r[4]; } test_vec[] = { - { { SIMDE_FLOAT16_VALUE( 816.50), SIMDE_FLOAT16_VALUE( 773.50), SIMDE_FLOAT16_VALUE( -377.75), SIMDE_FLOAT16_VALUE( 544.50) }, - { SIMDE_FLOAT16_VALUE( -188.38), SIMDE_FLOAT16_VALUE( 269.75), SIMDE_FLOAT16_VALUE( 115.62), SIMDE_FLOAT16_VALUE( -544.00) }, - { SIMDE_FLOAT16_VALUE( 1086.00), SIMDE_FLOAT16_VALUE( 962.00), SIMDE_FLOAT16_VALUE( -922.00), SIMDE_FLOAT16_VALUE( 429.00) } }, - { { SIMDE_FLOAT16_VALUE( -659.50), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( 484.00), SIMDE_FLOAT16_VALUE( -757.00) }, - { SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, - { SIMDE_FLOAT16_VALUE( -556.50), SIMDE_FLOAT16_VALUE( 194.50), SIMDE_FLOAT16_VALUE( 1382.00), SIMDE_FLOAT16_VALUE( -375.75) } }, - { { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, - { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, - { SIMDE_FLOAT16_VALUE( 454.50), SIMDE_FLOAT16_VALUE( -107.75), SIMDE_FLOAT16_VALUE( -67.25), SIMDE_FLOAT16_VALUE( 607.00) } }, - { { SIMDE_FLOAT16_VALUE( -899.00), SIMDE_FLOAT16_VALUE( -582.50), SIMDE_FLOAT16_VALUE( 359.00), SIMDE_FLOAT16_VALUE( 342.25) }, - { SIMDE_FLOAT16_VALUE( 913.00), SIMDE_FLOAT16_VALUE( 740.50), SIMDE_FLOAT16_VALUE( -436.25), SIMDE_FLOAT16_VALUE( -904.00) }, - { SIMDE_FLOAT16_VALUE( -158.50), SIMDE_FLOAT16_VALUE( -1496.00), SIMDE_FLOAT16_VALUE( -545.00), SIMDE_FLOAT16_VALUE( 778.50) } }, - { { SIMDE_FLOAT16_VALUE( -335.25), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( 227.25), SIMDE_FLOAT16_VALUE( -8.67) }, - { SIMDE_FLOAT16_VALUE( 490.50), SIMDE_FLOAT16_VALUE( 507.50), SIMDE_FLOAT16_VALUE( -979.50), SIMDE_FLOAT16_VALUE( -718.00) }, - { SIMDE_FLOAT16_VALUE( 172.25), SIMDE_FLOAT16_VALUE( -743.00), SIMDE_FLOAT16_VALUE( -490.75), SIMDE_FLOAT16_VALUE( 971.00) } }, - { { SIMDE_FLOAT16_VALUE( 498.50), SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -797.50), SIMDE_FLOAT16_VALUE( 741.50) }, - { SIMDE_FLOAT16_VALUE( -835.00), SIMDE_FLOAT16_VALUE( -308.00), SIMDE_FLOAT16_VALUE( -674.50), SIMDE_FLOAT16_VALUE( -247.00) }, - { SIMDE_FLOAT16_VALUE( 190.50), SIMDE_FLOAT16_VALUE( 1041.00), SIMDE_FLOAT16_VALUE( -1044.00), SIMDE_FLOAT16_VALUE( 1416.00) } }, - { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, - { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, - { SIMDE_FLOAT16_VALUE( -1177.00), SIMDE_FLOAT16_VALUE( -573.00), SIMDE_FLOAT16_VALUE( 1188.00), SIMDE_FLOAT16_VALUE( -813.00) } }, - { { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50) }, - { SIMDE_FLOAT16_VALUE( -975.50), SIMDE_FLOAT16_VALUE( 383.50), SIMDE_FLOAT16_VALUE( 500.25), SIMDE_FLOAT16_VALUE( 517.00) }, - { SIMDE_FLOAT16_VALUE( 22.50), SIMDE_FLOAT16_VALUE( 1161.00), SIMDE_FLOAT16_VALUE( 577.50), SIMDE_FLOAT16_VALUE( 249.25) } } - }; + {{SIMDE_FLOAT16_VALUE(816.50), SIMDE_FLOAT16_VALUE(773.50), + SIMDE_FLOAT16_VALUE(-377.75), SIMDE_FLOAT16_VALUE(544.50)}, + {SIMDE_FLOAT16_VALUE(-188.38), SIMDE_FLOAT16_VALUE(269.75), + SIMDE_FLOAT16_VALUE(115.62), SIMDE_FLOAT16_VALUE(-544.00)}, + {SIMDE_FLOAT16_VALUE(1086.00), SIMDE_FLOAT16_VALUE(962.00), + SIMDE_FLOAT16_VALUE(-922.00), SIMDE_FLOAT16_VALUE(429.00)}}, + {{SIMDE_FLOAT16_VALUE(-659.50), SIMDE_FLOAT16_VALUE(924.50), + SIMDE_FLOAT16_VALUE(484.00), SIMDE_FLOAT16_VALUE(-757.00)}, + {SIMDE_FLOAT16_VALUE(730.00), SIMDE_FLOAT16_VALUE(103.12), + SIMDE_FLOAT16_VALUE(-381.25), SIMDE_FLOAT16_VALUE(898.50)}, + {SIMDE_FLOAT16_VALUE(-556.50), SIMDE_FLOAT16_VALUE(194.50), + SIMDE_FLOAT16_VALUE(1382.00), SIMDE_FLOAT16_VALUE(-375.75)}}, + {{SIMDE_FLOAT16_VALUE(293.00), SIMDE_FLOAT16_VALUE(-114.06), + SIMDE_FLOAT16_VALUE(-137.62), SIMDE_FLOAT16_VALUE(722.50)}, + {SIMDE_FLOAT16_VALUE(-6.33), SIMDE_FLOAT16_VALUE(161.38), + SIMDE_FLOAT16_VALUE(115.50), SIMDE_FLOAT16_VALUE(70.38)}, + {SIMDE_FLOAT16_VALUE(454.50), SIMDE_FLOAT16_VALUE(-107.75), + SIMDE_FLOAT16_VALUE(-67.25), SIMDE_FLOAT16_VALUE(607.00)}}, + {{SIMDE_FLOAT16_VALUE(-899.00), SIMDE_FLOAT16_VALUE(-582.50), + SIMDE_FLOAT16_VALUE(359.00), SIMDE_FLOAT16_VALUE(342.25)}, + {SIMDE_FLOAT16_VALUE(913.00), SIMDE_FLOAT16_VALUE(740.50), + SIMDE_FLOAT16_VALUE(-436.25), SIMDE_FLOAT16_VALUE(-904.00)}, + {SIMDE_FLOAT16_VALUE(-158.50), SIMDE_FLOAT16_VALUE(-1496.00), + SIMDE_FLOAT16_VALUE(-545.00), SIMDE_FLOAT16_VALUE(778.50)}}, + {{SIMDE_FLOAT16_VALUE(-335.25), SIMDE_FLOAT16_VALUE(-252.38), + SIMDE_FLOAT16_VALUE(227.25), SIMDE_FLOAT16_VALUE(-8.67)}, + {SIMDE_FLOAT16_VALUE(490.50), SIMDE_FLOAT16_VALUE(507.50), + SIMDE_FLOAT16_VALUE(-979.50), SIMDE_FLOAT16_VALUE(-718.00)}, + {SIMDE_FLOAT16_VALUE(172.25), SIMDE_FLOAT16_VALUE(-743.00), + SIMDE_FLOAT16_VALUE(-490.75), SIMDE_FLOAT16_VALUE(971.00)}}, + {{SIMDE_FLOAT16_VALUE(498.50), SIMDE_FLOAT16_VALUE(205.75), + SIMDE_FLOAT16_VALUE(-797.50), SIMDE_FLOAT16_VALUE(741.50)}, + {SIMDE_FLOAT16_VALUE(-835.00), SIMDE_FLOAT16_VALUE(-308.00), + SIMDE_FLOAT16_VALUE(-674.50), SIMDE_FLOAT16_VALUE(-247.00)}, + {SIMDE_FLOAT16_VALUE(190.50), SIMDE_FLOAT16_VALUE(1041.00), + SIMDE_FLOAT16_VALUE(-1044.00), SIMDE_FLOAT16_VALUE(1416.00)}}, + {{SIMDE_FLOAT16_VALUE(-210.88), SIMDE_FLOAT16_VALUE(-430.75), + SIMDE_FLOAT16_VALUE(725.50), SIMDE_FLOAT16_VALUE(-944.50)}, + {SIMDE_FLOAT16_VALUE(142.00), SIMDE_FLOAT16_VALUE(-966.00), + SIMDE_FLOAT16_VALUE(-131.50), SIMDE_FLOAT16_VALUE(462.50)}, + {SIMDE_FLOAT16_VALUE(-1177.00), SIMDE_FLOAT16_VALUE(-573.00), + SIMDE_FLOAT16_VALUE(1188.00), SIMDE_FLOAT16_VALUE(-813.00)}}, + {{SIMDE_FLOAT16_VALUE(-361.00), SIMDE_FLOAT16_VALUE(185.62), + SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(749.50)}, + {SIMDE_FLOAT16_VALUE(-975.50), SIMDE_FLOAT16_VALUE(383.50), + SIMDE_FLOAT16_VALUE(500.25), SIMDE_FLOAT16_VALUE(517.00)}, + {SIMDE_FLOAT16_VALUE(22.50), SIMDE_FLOAT16_VALUE(1161.00), + SIMDE_FLOAT16_VALUE(577.50), SIMDE_FLOAT16_VALUE(249.25)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x4_t r = simde_vcadd_rot270_f16(a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - for (int i = 0 ; i < 8 ; i++) { + for (int i = 0; i < 8; i++) { simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t r = simde_vcadd_rot270_f16(a, b); @@ -62,76 +84,122 @@ test_simde_vcadd_rot270_f16 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcaddq_rot270_f16 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcaddq_rot270_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float16_t a[8]; simde_float16_t b[8]; simde_float16_t r[8]; } test_vec[] = { - { { SIMDE_FLOAT16_VALUE( 540.00), SIMDE_FLOAT16_VALUE( -927.50), SIMDE_FLOAT16_VALUE( 17.94), SIMDE_FLOAT16_VALUE( 340.75), - SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( 854.00), SIMDE_FLOAT16_VALUE( -110.75) }, - { SIMDE_FLOAT16_VALUE( -936.50), SIMDE_FLOAT16_VALUE( -465.00), SIMDE_FLOAT16_VALUE( -104.00), SIMDE_FLOAT16_VALUE( -348.00), - SIMDE_FLOAT16_VALUE( -370.75), SIMDE_FLOAT16_VALUE( 18.20), SIMDE_FLOAT16_VALUE( -209.25), SIMDE_FLOAT16_VALUE( -856.00) }, - { SIMDE_FLOAT16_VALUE(75.000000), SIMDE_FLOAT16_VALUE( 9.000000), SIMDE_FLOAT16_VALUE(-330.000000), SIMDE_FLOAT16_VALUE(444.750000), - SIMDE_FLOAT16_VALUE(-379.500000), SIMDE_FLOAT16_VALUE(537.000000), SIMDE_FLOAT16_VALUE(-2.000000), SIMDE_FLOAT16_VALUE(98.500000) } }, - { { SIMDE_FLOAT16_VALUE( 251.25), SIMDE_FLOAT16_VALUE( 523.50), SIMDE_FLOAT16_VALUE( -62.34), SIMDE_FLOAT16_VALUE( -412.00), - SIMDE_FLOAT16_VALUE( 818.50), SIMDE_FLOAT16_VALUE( -266.25), SIMDE_FLOAT16_VALUE( 558.50), SIMDE_FLOAT16_VALUE( -474.75) }, - { SIMDE_FLOAT16_VALUE( 583.50), SIMDE_FLOAT16_VALUE( -666.00), SIMDE_FLOAT16_VALUE( 875.50), SIMDE_FLOAT16_VALUE( 382.50), - SIMDE_FLOAT16_VALUE( -535.00), SIMDE_FLOAT16_VALUE( 374.50), SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 432.50) }, - { SIMDE_FLOAT16_VALUE(-414.750000), SIMDE_FLOAT16_VALUE(-60.000000), SIMDE_FLOAT16_VALUE(320.250000), SIMDE_FLOAT16_VALUE(-1288.000000), - SIMDE_FLOAT16_VALUE(1193.000000), SIMDE_FLOAT16_VALUE(268.750000), SIMDE_FLOAT16_VALUE(991.000000), SIMDE_FLOAT16_VALUE(-564.500000) } }, - { { SIMDE_FLOAT16_VALUE( 83.19), SIMDE_FLOAT16_VALUE( 418.00), SIMDE_FLOAT16_VALUE( -875.50), SIMDE_FLOAT16_VALUE( 830.50), - SIMDE_FLOAT16_VALUE( -633.50), SIMDE_FLOAT16_VALUE( 832.00), SIMDE_FLOAT16_VALUE( -890.00), SIMDE_FLOAT16_VALUE( 650.00) }, - { SIMDE_FLOAT16_VALUE( -111.25), SIMDE_FLOAT16_VALUE( -830.50), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( 970.50), - SIMDE_FLOAT16_VALUE( 522.50), SIMDE_FLOAT16_VALUE( -822.50), SIMDE_FLOAT16_VALUE( 411.25), SIMDE_FLOAT16_VALUE( -692.50) }, - { SIMDE_FLOAT16_VALUE(-747.500000), SIMDE_FLOAT16_VALUE(529.000000), SIMDE_FLOAT16_VALUE(95.000000), SIMDE_FLOAT16_VALUE(771.000000), - SIMDE_FLOAT16_VALUE(-1456.000000), SIMDE_FLOAT16_VALUE(309.500000), SIMDE_FLOAT16_VALUE(-1582.000000), SIMDE_FLOAT16_VALUE(238.750000) } }, - { { SIMDE_FLOAT16_VALUE( 648.50), SIMDE_FLOAT16_VALUE( 385.25), SIMDE_FLOAT16_VALUE( 479.25), SIMDE_FLOAT16_VALUE( -793.50), - SIMDE_FLOAT16_VALUE( -740.50), SIMDE_FLOAT16_VALUE( 245.00), SIMDE_FLOAT16_VALUE( 229.25), SIMDE_FLOAT16_VALUE( -113.25) }, - { SIMDE_FLOAT16_VALUE( 331.50), SIMDE_FLOAT16_VALUE( -677.50), SIMDE_FLOAT16_VALUE( 97.31), SIMDE_FLOAT16_VALUE( -52.09), - SIMDE_FLOAT16_VALUE( 239.12), SIMDE_FLOAT16_VALUE( 469.75), SIMDE_FLOAT16_VALUE( -543.50), SIMDE_FLOAT16_VALUE( 827.00) }, - { SIMDE_FLOAT16_VALUE(-29.000000), SIMDE_FLOAT16_VALUE(53.750000), SIMDE_FLOAT16_VALUE(427.250000), SIMDE_FLOAT16_VALUE(-891.000000), - SIMDE_FLOAT16_VALUE(-270.750000), SIMDE_FLOAT16_VALUE( 5.875000), SIMDE_FLOAT16_VALUE(1056.000000), SIMDE_FLOAT16_VALUE(430.250000) } }, - { { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( 420.50), - SIMDE_FLOAT16_VALUE( -699.00), SIMDE_FLOAT16_VALUE( -151.12), SIMDE_FLOAT16_VALUE( -388.25), SIMDE_FLOAT16_VALUE( 350.75) }, - { SIMDE_FLOAT16_VALUE( 890.50), SIMDE_FLOAT16_VALUE( -665.00), SIMDE_FLOAT16_VALUE( -618.00), SIMDE_FLOAT16_VALUE( 190.88), - SIMDE_FLOAT16_VALUE( 218.12), SIMDE_FLOAT16_VALUE( -329.00), SIMDE_FLOAT16_VALUE( -549.50), SIMDE_FLOAT16_VALUE( -460.00) }, - { SIMDE_FLOAT16_VALUE(-438.500000), SIMDE_FLOAT16_VALUE(-711.500000), SIMDE_FLOAT16_VALUE(372.500000), SIMDE_FLOAT16_VALUE(1038.000000), - SIMDE_FLOAT16_VALUE(-1028.000000), SIMDE_FLOAT16_VALUE(-369.250000), SIMDE_FLOAT16_VALUE(-848.000000), SIMDE_FLOAT16_VALUE(900.000000) } }, - { { SIMDE_FLOAT16_VALUE( -378.00), SIMDE_FLOAT16_VALUE( -695.50), SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44), - SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00) }, - { SIMDE_FLOAT16_VALUE( -786.50), SIMDE_FLOAT16_VALUE( -974.00), SIMDE_FLOAT16_VALUE( -570.50), SIMDE_FLOAT16_VALUE( 369.25), - SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( -640.00), SIMDE_FLOAT16_VALUE( -552.00), SIMDE_FLOAT16_VALUE( 75.88) }, - { SIMDE_FLOAT16_VALUE(-1352.000000), SIMDE_FLOAT16_VALUE(91.000000), SIMDE_FLOAT16_VALUE(490.750000), SIMDE_FLOAT16_VALUE(492.000000), - SIMDE_FLOAT16_VALUE(-1510.000000), SIMDE_FLOAT16_VALUE(-758.000000), SIMDE_FLOAT16_VALUE(533.000000), SIMDE_FLOAT16_VALUE(1131.000000) } }, - { { SIMDE_FLOAT16_VALUE( 352.25), SIMDE_FLOAT16_VALUE( 324.25), SIMDE_FLOAT16_VALUE( 485.50), SIMDE_FLOAT16_VALUE( 943.50), - SIMDE_FLOAT16_VALUE( -976.00), SIMDE_FLOAT16_VALUE( 395.50), SIMDE_FLOAT16_VALUE( -100.62), SIMDE_FLOAT16_VALUE( -463.75) }, - { SIMDE_FLOAT16_VALUE( -973.00), SIMDE_FLOAT16_VALUE( -159.62), SIMDE_FLOAT16_VALUE( -377.00), SIMDE_FLOAT16_VALUE( -853.00), - SIMDE_FLOAT16_VALUE( 831.00), SIMDE_FLOAT16_VALUE( 661.00), SIMDE_FLOAT16_VALUE( -717.00), SIMDE_FLOAT16_VALUE( 891.50) }, - { SIMDE_FLOAT16_VALUE(192.625000), SIMDE_FLOAT16_VALUE(1297.000000), SIMDE_FLOAT16_VALUE(-367.500000), SIMDE_FLOAT16_VALUE(1320.000000), - SIMDE_FLOAT16_VALUE(-315.000000), SIMDE_FLOAT16_VALUE(-435.500000), SIMDE_FLOAT16_VALUE(791.000000), SIMDE_FLOAT16_VALUE(253.250000) } }, - { { SIMDE_FLOAT16_VALUE( 535.50), SIMDE_FLOAT16_VALUE( 621.50), SIMDE_FLOAT16_VALUE( 578.50), SIMDE_FLOAT16_VALUE( 430.50), - SIMDE_FLOAT16_VALUE( -574.50), SIMDE_FLOAT16_VALUE( -319.75), SIMDE_FLOAT16_VALUE( -735.00), SIMDE_FLOAT16_VALUE( 248.50) }, - { SIMDE_FLOAT16_VALUE( 274.50), SIMDE_FLOAT16_VALUE( 192.38), SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 502.25), - SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( -70.81), SIMDE_FLOAT16_VALUE( -596.50), SIMDE_FLOAT16_VALUE( 810.50) }, - { SIMDE_FLOAT16_VALUE(728.000000), SIMDE_FLOAT16_VALUE(347.000000), SIMDE_FLOAT16_VALUE(1081.000000), SIMDE_FLOAT16_VALUE(-248.500000), - SIMDE_FLOAT16_VALUE(-645.500000), SIMDE_FLOAT16_VALUE(298.750000), SIMDE_FLOAT16_VALUE(75.500000), SIMDE_FLOAT16_VALUE(845.000000) } } - }; + {{SIMDE_FLOAT16_VALUE(540.00), SIMDE_FLOAT16_VALUE(-927.50), + SIMDE_FLOAT16_VALUE(17.94), SIMDE_FLOAT16_VALUE(340.75), + SIMDE_FLOAT16_VALUE(-397.75), SIMDE_FLOAT16_VALUE(166.12), + SIMDE_FLOAT16_VALUE(854.00), SIMDE_FLOAT16_VALUE(-110.75)}, + {SIMDE_FLOAT16_VALUE(-936.50), SIMDE_FLOAT16_VALUE(-465.00), + SIMDE_FLOAT16_VALUE(-104.00), SIMDE_FLOAT16_VALUE(-348.00), + SIMDE_FLOAT16_VALUE(-370.75), SIMDE_FLOAT16_VALUE(18.20), + SIMDE_FLOAT16_VALUE(-209.25), SIMDE_FLOAT16_VALUE(-856.00)}, + {SIMDE_FLOAT16_VALUE(75.000000), SIMDE_FLOAT16_VALUE(9.000000), + SIMDE_FLOAT16_VALUE(-330.000000), SIMDE_FLOAT16_VALUE(444.750000), + SIMDE_FLOAT16_VALUE(-379.500000), SIMDE_FLOAT16_VALUE(537.000000), + SIMDE_FLOAT16_VALUE(-2.000000), SIMDE_FLOAT16_VALUE(98.500000)}}, + {{SIMDE_FLOAT16_VALUE(251.25), SIMDE_FLOAT16_VALUE(523.50), + SIMDE_FLOAT16_VALUE(-62.34), SIMDE_FLOAT16_VALUE(-412.00), + SIMDE_FLOAT16_VALUE(818.50), SIMDE_FLOAT16_VALUE(-266.25), + SIMDE_FLOAT16_VALUE(558.50), SIMDE_FLOAT16_VALUE(-474.75)}, + {SIMDE_FLOAT16_VALUE(583.50), SIMDE_FLOAT16_VALUE(-666.00), + SIMDE_FLOAT16_VALUE(875.50), SIMDE_FLOAT16_VALUE(382.50), + SIMDE_FLOAT16_VALUE(-535.00), SIMDE_FLOAT16_VALUE(374.50), + SIMDE_FLOAT16_VALUE(89.81), SIMDE_FLOAT16_VALUE(432.50)}, + {SIMDE_FLOAT16_VALUE(-414.750000), SIMDE_FLOAT16_VALUE(-60.000000), + SIMDE_FLOAT16_VALUE(320.250000), SIMDE_FLOAT16_VALUE(-1288.000000), + SIMDE_FLOAT16_VALUE(1193.000000), SIMDE_FLOAT16_VALUE(268.750000), + SIMDE_FLOAT16_VALUE(991.000000), SIMDE_FLOAT16_VALUE(-564.500000)}}, + {{SIMDE_FLOAT16_VALUE(83.19), SIMDE_FLOAT16_VALUE(418.00), + SIMDE_FLOAT16_VALUE(-875.50), SIMDE_FLOAT16_VALUE(830.50), + SIMDE_FLOAT16_VALUE(-633.50), SIMDE_FLOAT16_VALUE(832.00), + SIMDE_FLOAT16_VALUE(-890.00), SIMDE_FLOAT16_VALUE(650.00)}, + {SIMDE_FLOAT16_VALUE(-111.25), SIMDE_FLOAT16_VALUE(-830.50), + SIMDE_FLOAT16_VALUE(59.75), SIMDE_FLOAT16_VALUE(970.50), + SIMDE_FLOAT16_VALUE(522.50), SIMDE_FLOAT16_VALUE(-822.50), + SIMDE_FLOAT16_VALUE(411.25), SIMDE_FLOAT16_VALUE(-692.50)}, + {SIMDE_FLOAT16_VALUE(-747.500000), SIMDE_FLOAT16_VALUE(529.000000), + SIMDE_FLOAT16_VALUE(95.000000), SIMDE_FLOAT16_VALUE(771.000000), + SIMDE_FLOAT16_VALUE(-1456.000000), SIMDE_FLOAT16_VALUE(309.500000), + SIMDE_FLOAT16_VALUE(-1582.000000), SIMDE_FLOAT16_VALUE(238.750000)}}, + {{SIMDE_FLOAT16_VALUE(648.50), SIMDE_FLOAT16_VALUE(385.25), + SIMDE_FLOAT16_VALUE(479.25), SIMDE_FLOAT16_VALUE(-793.50), + SIMDE_FLOAT16_VALUE(-740.50), SIMDE_FLOAT16_VALUE(245.00), + SIMDE_FLOAT16_VALUE(229.25), SIMDE_FLOAT16_VALUE(-113.25)}, + {SIMDE_FLOAT16_VALUE(331.50), SIMDE_FLOAT16_VALUE(-677.50), + SIMDE_FLOAT16_VALUE(97.31), SIMDE_FLOAT16_VALUE(-52.09), + SIMDE_FLOAT16_VALUE(239.12), SIMDE_FLOAT16_VALUE(469.75), + SIMDE_FLOAT16_VALUE(-543.50), SIMDE_FLOAT16_VALUE(827.00)}, + {SIMDE_FLOAT16_VALUE(-29.000000), SIMDE_FLOAT16_VALUE(53.750000), + SIMDE_FLOAT16_VALUE(427.250000), SIMDE_FLOAT16_VALUE(-891.000000), + SIMDE_FLOAT16_VALUE(-270.750000), SIMDE_FLOAT16_VALUE(5.875000), + SIMDE_FLOAT16_VALUE(1056.000000), SIMDE_FLOAT16_VALUE(430.250000)}}, + {{SIMDE_FLOAT16_VALUE(226.38), SIMDE_FLOAT16_VALUE(178.88), + SIMDE_FLOAT16_VALUE(181.75), SIMDE_FLOAT16_VALUE(420.50), + SIMDE_FLOAT16_VALUE(-699.00), SIMDE_FLOAT16_VALUE(-151.12), + SIMDE_FLOAT16_VALUE(-388.25), SIMDE_FLOAT16_VALUE(350.75)}, + {SIMDE_FLOAT16_VALUE(890.50), SIMDE_FLOAT16_VALUE(-665.00), + SIMDE_FLOAT16_VALUE(-618.00), SIMDE_FLOAT16_VALUE(190.88), + SIMDE_FLOAT16_VALUE(218.12), SIMDE_FLOAT16_VALUE(-329.00), + SIMDE_FLOAT16_VALUE(-549.50), SIMDE_FLOAT16_VALUE(-460.00)}, + {SIMDE_FLOAT16_VALUE(-438.500000), SIMDE_FLOAT16_VALUE(-711.500000), + SIMDE_FLOAT16_VALUE(372.500000), SIMDE_FLOAT16_VALUE(1038.000000), + SIMDE_FLOAT16_VALUE(-1028.000000), SIMDE_FLOAT16_VALUE(-369.250000), + SIMDE_FLOAT16_VALUE(-848.000000), SIMDE_FLOAT16_VALUE(900.000000)}}, + {{SIMDE_FLOAT16_VALUE(-378.00), SIMDE_FLOAT16_VALUE(-695.50), + SIMDE_FLOAT16_VALUE(121.50), SIMDE_FLOAT16_VALUE(-78.44), + SIMDE_FLOAT16_VALUE(-869.50), SIMDE_FLOAT16_VALUE(-710.00), + SIMDE_FLOAT16_VALUE(457.25), SIMDE_FLOAT16_VALUE(579.00)}, + {SIMDE_FLOAT16_VALUE(-786.50), SIMDE_FLOAT16_VALUE(-974.00), + SIMDE_FLOAT16_VALUE(-570.50), SIMDE_FLOAT16_VALUE(369.25), + SIMDE_FLOAT16_VALUE(48.06), SIMDE_FLOAT16_VALUE(-640.00), + SIMDE_FLOAT16_VALUE(-552.00), SIMDE_FLOAT16_VALUE(75.88)}, + {SIMDE_FLOAT16_VALUE(-1352.000000), SIMDE_FLOAT16_VALUE(91.000000), + SIMDE_FLOAT16_VALUE(490.750000), SIMDE_FLOAT16_VALUE(492.000000), + SIMDE_FLOAT16_VALUE(-1510.000000), SIMDE_FLOAT16_VALUE(-758.000000), + SIMDE_FLOAT16_VALUE(533.000000), SIMDE_FLOAT16_VALUE(1131.000000)}}, + {{SIMDE_FLOAT16_VALUE(352.25), SIMDE_FLOAT16_VALUE(324.25), + SIMDE_FLOAT16_VALUE(485.50), SIMDE_FLOAT16_VALUE(943.50), + SIMDE_FLOAT16_VALUE(-976.00), SIMDE_FLOAT16_VALUE(395.50), + SIMDE_FLOAT16_VALUE(-100.62), SIMDE_FLOAT16_VALUE(-463.75)}, + {SIMDE_FLOAT16_VALUE(-973.00), SIMDE_FLOAT16_VALUE(-159.62), + SIMDE_FLOAT16_VALUE(-377.00), SIMDE_FLOAT16_VALUE(-853.00), + SIMDE_FLOAT16_VALUE(831.00), SIMDE_FLOAT16_VALUE(661.00), + SIMDE_FLOAT16_VALUE(-717.00), SIMDE_FLOAT16_VALUE(891.50)}, + {SIMDE_FLOAT16_VALUE(192.625000), SIMDE_FLOAT16_VALUE(1297.000000), + SIMDE_FLOAT16_VALUE(-367.500000), SIMDE_FLOAT16_VALUE(1320.000000), + SIMDE_FLOAT16_VALUE(-315.000000), SIMDE_FLOAT16_VALUE(-435.500000), + SIMDE_FLOAT16_VALUE(791.000000), SIMDE_FLOAT16_VALUE(253.250000)}}, + {{SIMDE_FLOAT16_VALUE(535.50), SIMDE_FLOAT16_VALUE(621.50), + SIMDE_FLOAT16_VALUE(578.50), SIMDE_FLOAT16_VALUE(430.50), + SIMDE_FLOAT16_VALUE(-574.50), SIMDE_FLOAT16_VALUE(-319.75), + SIMDE_FLOAT16_VALUE(-735.00), SIMDE_FLOAT16_VALUE(248.50)}, + {SIMDE_FLOAT16_VALUE(274.50), SIMDE_FLOAT16_VALUE(192.38), + SIMDE_FLOAT16_VALUE(679.00), SIMDE_FLOAT16_VALUE(502.25), + SIMDE_FLOAT16_VALUE(-618.50), SIMDE_FLOAT16_VALUE(-70.81), + SIMDE_FLOAT16_VALUE(-596.50), SIMDE_FLOAT16_VALUE(810.50)}, + {SIMDE_FLOAT16_VALUE(728.000000), SIMDE_FLOAT16_VALUE(347.000000), + SIMDE_FLOAT16_VALUE(1081.000000), SIMDE_FLOAT16_VALUE(-248.500000), + SIMDE_FLOAT16_VALUE(-645.500000), SIMDE_FLOAT16_VALUE(298.750000), + SIMDE_FLOAT16_VALUE(75.500000), SIMDE_FLOAT16_VALUE(845.000000)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); simde_float16x8_t r = simde_vcaddq_rot270_f16(a, b); - simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), + 1); } - return 0; #else fputc('\n', stdout); - for (int i = 0 ; i < 8 ; i++) { + for (int i = 0; i < 8; i++) { simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t r = simde_vcaddq_rot270_f16(a, b); @@ -144,53 +212,50 @@ test_simde_vcaddq_rot270_f16 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcadd_rot270_f32 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcadd_rot270_f32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { - { { SIMDE_FLOAT32_C( 863.66), SIMDE_FLOAT32_C( 828.31) }, - { SIMDE_FLOAT32_C( -563.51), SIMDE_FLOAT32_C( -576.51) }, - { SIMDE_FLOAT32_C(287.149963), SIMDE_FLOAT32_C(1391.820068) } }, - { { SIMDE_FLOAT32_C( -703.45), SIMDE_FLOAT32_C( 383.90) }, - { SIMDE_FLOAT32_C( -772.46), SIMDE_FLOAT32_C( 457.40) }, - { SIMDE_FLOAT32_C(-246.050018), SIMDE_FLOAT32_C(1156.359985) } }, - { { SIMDE_FLOAT32_C( 295.99), SIMDE_FLOAT32_C( 653.10) }, - { SIMDE_FLOAT32_C( -120.98), SIMDE_FLOAT32_C( 945.50) }, - { SIMDE_FLOAT32_C(1241.489990), SIMDE_FLOAT32_C(774.079956) } }, - { { SIMDE_FLOAT32_C( -280.81), SIMDE_FLOAT32_C( 631.32) }, - { SIMDE_FLOAT32_C( 688.34), SIMDE_FLOAT32_C( 191.95) }, - { SIMDE_FLOAT32_C(-88.860001), SIMDE_FLOAT32_C(-57.020020) } }, - { { SIMDE_FLOAT32_C( -522.88), SIMDE_FLOAT32_C( -323.79) }, - { SIMDE_FLOAT32_C( -887.99), SIMDE_FLOAT32_C( -283.70) }, - { SIMDE_FLOAT32_C(-806.580017), SIMDE_FLOAT32_C(564.199951) } }, - { { SIMDE_FLOAT32_C( -117.76), SIMDE_FLOAT32_C( -841.45) }, - { SIMDE_FLOAT32_C( 664.94), SIMDE_FLOAT32_C( -987.19) }, - { SIMDE_FLOAT32_C(-1104.949951), SIMDE_FLOAT32_C(-1506.390015) } }, - { { SIMDE_FLOAT32_C( -642.89), SIMDE_FLOAT32_C( -152.10) }, - { SIMDE_FLOAT32_C( 963.83), SIMDE_FLOAT32_C( 919.89) }, - { SIMDE_FLOAT32_C(277.000000), SIMDE_FLOAT32_C(-1115.930054) } }, - { { SIMDE_FLOAT32_C( 630.40), SIMDE_FLOAT32_C( -669.33) }, - { SIMDE_FLOAT32_C( 671.13), SIMDE_FLOAT32_C( 256.93) }, - { SIMDE_FLOAT32_C(887.330017), SIMDE_FLOAT32_C(-1340.459961) } } - }; + {{SIMDE_FLOAT32_C(863.66), SIMDE_FLOAT32_C(828.31)}, + {SIMDE_FLOAT32_C(-563.51), SIMDE_FLOAT32_C(-576.51)}, + {SIMDE_FLOAT32_C(287.149963), SIMDE_FLOAT32_C(1391.820068)}}, + {{SIMDE_FLOAT32_C(-703.45), SIMDE_FLOAT32_C(383.90)}, + {SIMDE_FLOAT32_C(-772.46), SIMDE_FLOAT32_C(457.40)}, + {SIMDE_FLOAT32_C(-246.050018), SIMDE_FLOAT32_C(1156.359985)}}, + {{SIMDE_FLOAT32_C(295.99), SIMDE_FLOAT32_C(653.10)}, + {SIMDE_FLOAT32_C(-120.98), SIMDE_FLOAT32_C(945.50)}, + {SIMDE_FLOAT32_C(1241.489990), SIMDE_FLOAT32_C(774.079956)}}, + {{SIMDE_FLOAT32_C(-280.81), SIMDE_FLOAT32_C(631.32)}, + {SIMDE_FLOAT32_C(688.34), SIMDE_FLOAT32_C(191.95)}, + {SIMDE_FLOAT32_C(-88.860001), SIMDE_FLOAT32_C(-57.020020)}}, + {{SIMDE_FLOAT32_C(-522.88), SIMDE_FLOAT32_C(-323.79)}, + {SIMDE_FLOAT32_C(-887.99), SIMDE_FLOAT32_C(-283.70)}, + {SIMDE_FLOAT32_C(-806.580017), SIMDE_FLOAT32_C(564.199951)}}, + {{SIMDE_FLOAT32_C(-117.76), SIMDE_FLOAT32_C(-841.45)}, + {SIMDE_FLOAT32_C(664.94), SIMDE_FLOAT32_C(-987.19)}, + {SIMDE_FLOAT32_C(-1104.949951), SIMDE_FLOAT32_C(-1506.390015)}}, + {{SIMDE_FLOAT32_C(-642.89), SIMDE_FLOAT32_C(-152.10)}, + {SIMDE_FLOAT32_C(963.83), SIMDE_FLOAT32_C(919.89)}, + {SIMDE_FLOAT32_C(277.000000), SIMDE_FLOAT32_C(-1115.930054)}}, + {{SIMDE_FLOAT32_C(630.40), SIMDE_FLOAT32_C(-669.33)}, + {SIMDE_FLOAT32_C(671.13), SIMDE_FLOAT32_C(256.93)}, + {SIMDE_FLOAT32_C(887.330017), SIMDE_FLOAT32_C(-1340.459961)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vcadd_rot270_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } - - + return 0; #else fputc('\n', stdout); - for (int i = 0 ; i < 8 ; i++) { + for (int i = 0; i < 8; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vcadd_rot270_f32(a, b); @@ -203,51 +268,74 @@ test_simde_vcadd_rot270_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcaddq_rot270_f32 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcaddq_rot270_f32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { - { { SIMDE_FLOAT32_C( -337.31), SIMDE_FLOAT32_C( -857.36), SIMDE_FLOAT32_C( 334.71), SIMDE_FLOAT32_C( -617.33) }, - { SIMDE_FLOAT32_C( -439.38), SIMDE_FLOAT32_C( 245.13), SIMDE_FLOAT32_C( 111.06), SIMDE_FLOAT32_C( 520.69) }, - { SIMDE_FLOAT32_C(-92.179993), SIMDE_FLOAT32_C(-417.979980), SIMDE_FLOAT32_C(855.400024), SIMDE_FLOAT32_C(-728.390015) } }, - { { SIMDE_FLOAT32_C( 85.49), SIMDE_FLOAT32_C( 250.19), SIMDE_FLOAT32_C( -679.96), SIMDE_FLOAT32_C( -750.25) }, - { SIMDE_FLOAT32_C( -138.26), SIMDE_FLOAT32_C( -14.62), SIMDE_FLOAT32_C( -921.52), SIMDE_FLOAT32_C( 225.91) }, - { SIMDE_FLOAT32_C(70.869995), SIMDE_FLOAT32_C(388.450012), SIMDE_FLOAT32_C(-454.050018), SIMDE_FLOAT32_C(171.270020) } }, - { { SIMDE_FLOAT32_C( 242.83), SIMDE_FLOAT32_C( 869.28), SIMDE_FLOAT32_C( 297.95), SIMDE_FLOAT32_C( 105.66) }, - { SIMDE_FLOAT32_C( -722.51), SIMDE_FLOAT32_C( -802.37), SIMDE_FLOAT32_C( -245.78), SIMDE_FLOAT32_C( 915.39) }, - { SIMDE_FLOAT32_C(-559.539978), SIMDE_FLOAT32_C(1591.790039), SIMDE_FLOAT32_C(1213.340088), SIMDE_FLOAT32_C(351.440002) } }, - { { SIMDE_FLOAT32_C( 54.20), SIMDE_FLOAT32_C( -928.06), SIMDE_FLOAT32_C( 362.39), SIMDE_FLOAT32_C( -936.63) }, - { SIMDE_FLOAT32_C( 185.82), SIMDE_FLOAT32_C( -244.43), SIMDE_FLOAT32_C( 924.66), SIMDE_FLOAT32_C( -643.82) }, - { SIMDE_FLOAT32_C(-190.229996), SIMDE_FLOAT32_C(-1113.880005), SIMDE_FLOAT32_C(-281.429993), SIMDE_FLOAT32_C(-1861.290039) } }, - { { SIMDE_FLOAT32_C( -516.92), SIMDE_FLOAT32_C( -615.16), SIMDE_FLOAT32_C( -751.52), SIMDE_FLOAT32_C( -974.04) }, - { SIMDE_FLOAT32_C( -144.42), SIMDE_FLOAT32_C( 338.27), SIMDE_FLOAT32_C( 704.92), SIMDE_FLOAT32_C( 116.90) }, - { SIMDE_FLOAT32_C(-178.649994), SIMDE_FLOAT32_C(-470.739990), SIMDE_FLOAT32_C(-634.619995), SIMDE_FLOAT32_C(-1678.959961) } }, - { { SIMDE_FLOAT32_C( 49.39), SIMDE_FLOAT32_C( -363.00), SIMDE_FLOAT32_C( -476.30), SIMDE_FLOAT32_C( 106.71) }, - { SIMDE_FLOAT32_C( -725.84), SIMDE_FLOAT32_C( -353.71), SIMDE_FLOAT32_C( 268.41), SIMDE_FLOAT32_C( 728.83) }, - { SIMDE_FLOAT32_C(-304.320007), SIMDE_FLOAT32_C(362.840027), SIMDE_FLOAT32_C(252.530029), SIMDE_FLOAT32_C(-161.700012) } }, - { { SIMDE_FLOAT32_C( -470.26), SIMDE_FLOAT32_C( 81.87), SIMDE_FLOAT32_C( 72.27), SIMDE_FLOAT32_C( -992.59) }, - { SIMDE_FLOAT32_C( -615.67), SIMDE_FLOAT32_C( 620.28), SIMDE_FLOAT32_C( -606.51), SIMDE_FLOAT32_C( -327.77) }, - { SIMDE_FLOAT32_C(150.020020), SIMDE_FLOAT32_C(697.539978), SIMDE_FLOAT32_C(-255.500000), SIMDE_FLOAT32_C(-386.080017) } }, - { { SIMDE_FLOAT32_C( -331.80), SIMDE_FLOAT32_C( -605.94), SIMDE_FLOAT32_C( -295.86), SIMDE_FLOAT32_C( -275.42) }, - { SIMDE_FLOAT32_C( -752.64), SIMDE_FLOAT32_C( 21.91), SIMDE_FLOAT32_C( 826.83), SIMDE_FLOAT32_C( 599.82) }, - { SIMDE_FLOAT32_C(-309.889984), SIMDE_FLOAT32_C(146.700012), SIMDE_FLOAT32_C(303.960022), SIMDE_FLOAT32_C(-1102.250000) } } - }; + {{SIMDE_FLOAT32_C(-337.31), SIMDE_FLOAT32_C(-857.36), + SIMDE_FLOAT32_C(334.71), SIMDE_FLOAT32_C(-617.33)}, + {SIMDE_FLOAT32_C(-439.38), SIMDE_FLOAT32_C(245.13), + SIMDE_FLOAT32_C(111.06), SIMDE_FLOAT32_C(520.69)}, + {SIMDE_FLOAT32_C(-92.179993), SIMDE_FLOAT32_C(-417.979980), + SIMDE_FLOAT32_C(855.400024), SIMDE_FLOAT32_C(-728.390015)}}, + {{SIMDE_FLOAT32_C(85.49), SIMDE_FLOAT32_C(250.19), + SIMDE_FLOAT32_C(-679.96), SIMDE_FLOAT32_C(-750.25)}, + {SIMDE_FLOAT32_C(-138.26), SIMDE_FLOAT32_C(-14.62), + SIMDE_FLOAT32_C(-921.52), SIMDE_FLOAT32_C(225.91)}, + {SIMDE_FLOAT32_C(70.869995), SIMDE_FLOAT32_C(388.450012), + SIMDE_FLOAT32_C(-454.050018), SIMDE_FLOAT32_C(171.270020)}}, + {{SIMDE_FLOAT32_C(242.83), SIMDE_FLOAT32_C(869.28), + SIMDE_FLOAT32_C(297.95), SIMDE_FLOAT32_C(105.66)}, + {SIMDE_FLOAT32_C(-722.51), SIMDE_FLOAT32_C(-802.37), + SIMDE_FLOAT32_C(-245.78), SIMDE_FLOAT32_C(915.39)}, + {SIMDE_FLOAT32_C(-559.539978), SIMDE_FLOAT32_C(1591.790039), + SIMDE_FLOAT32_C(1213.340088), SIMDE_FLOAT32_C(351.440002)}}, + {{SIMDE_FLOAT32_C(54.20), SIMDE_FLOAT32_C(-928.06), + SIMDE_FLOAT32_C(362.39), SIMDE_FLOAT32_C(-936.63)}, + {SIMDE_FLOAT32_C(185.82), SIMDE_FLOAT32_C(-244.43), + SIMDE_FLOAT32_C(924.66), SIMDE_FLOAT32_C(-643.82)}, + {SIMDE_FLOAT32_C(-190.229996), SIMDE_FLOAT32_C(-1113.880005), + SIMDE_FLOAT32_C(-281.429993), SIMDE_FLOAT32_C(-1861.290039)}}, + {{SIMDE_FLOAT32_C(-516.92), SIMDE_FLOAT32_C(-615.16), + SIMDE_FLOAT32_C(-751.52), SIMDE_FLOAT32_C(-974.04)}, + {SIMDE_FLOAT32_C(-144.42), SIMDE_FLOAT32_C(338.27), + SIMDE_FLOAT32_C(704.92), SIMDE_FLOAT32_C(116.90)}, + {SIMDE_FLOAT32_C(-178.649994), SIMDE_FLOAT32_C(-470.739990), + SIMDE_FLOAT32_C(-634.619995), SIMDE_FLOAT32_C(-1678.959961)}}, + {{SIMDE_FLOAT32_C(49.39), SIMDE_FLOAT32_C(-363.00), + SIMDE_FLOAT32_C(-476.30), SIMDE_FLOAT32_C(106.71)}, + {SIMDE_FLOAT32_C(-725.84), SIMDE_FLOAT32_C(-353.71), + SIMDE_FLOAT32_C(268.41), SIMDE_FLOAT32_C(728.83)}, + {SIMDE_FLOAT32_C(-304.320007), SIMDE_FLOAT32_C(362.840027), + SIMDE_FLOAT32_C(252.530029), SIMDE_FLOAT32_C(-161.700012)}}, + {{SIMDE_FLOAT32_C(-470.26), SIMDE_FLOAT32_C(81.87), + SIMDE_FLOAT32_C(72.27), SIMDE_FLOAT32_C(-992.59)}, + {SIMDE_FLOAT32_C(-615.67), SIMDE_FLOAT32_C(620.28), + SIMDE_FLOAT32_C(-606.51), SIMDE_FLOAT32_C(-327.77)}, + {SIMDE_FLOAT32_C(150.020020), SIMDE_FLOAT32_C(697.539978), + SIMDE_FLOAT32_C(-255.500000), SIMDE_FLOAT32_C(-386.080017)}}, + {{SIMDE_FLOAT32_C(-331.80), SIMDE_FLOAT32_C(-605.94), + SIMDE_FLOAT32_C(-295.86), SIMDE_FLOAT32_C(-275.42)}, + {SIMDE_FLOAT32_C(-752.64), SIMDE_FLOAT32_C(21.91), + SIMDE_FLOAT32_C(826.83), SIMDE_FLOAT32_C(599.82)}, + {SIMDE_FLOAT32_C(-309.889984), SIMDE_FLOAT32_C(146.700012), + SIMDE_FLOAT32_C(303.960022), SIMDE_FLOAT32_C(-1102.250000)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vcaddq_rot270_f32(a, b); - simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), + 1); } - + return 0; #else fputc('\n', stdout); - for (int i = 0 ; i < 8 ; i++) { + for (int i = 0; i < 8; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcaddq_rot270_f32(a, b); @@ -260,52 +348,50 @@ test_simde_vcaddq_rot270_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcaddq_rot270_f64 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcaddq_rot270_f64(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { - { { SIMDE_FLOAT64_C( -30.36), SIMDE_FLOAT64_C( 631.53) }, - { SIMDE_FLOAT64_C( 850.75), SIMDE_FLOAT64_C( -263.55) }, - { SIMDE_FLOAT64_C(-293.910000), SIMDE_FLOAT64_C(-219.220000) } }, - { { SIMDE_FLOAT64_C( 139.96), SIMDE_FLOAT64_C( 859.14) }, - { SIMDE_FLOAT64_C( -834.47), SIMDE_FLOAT64_C( 216.10) }, - { SIMDE_FLOAT64_C(356.060000), SIMDE_FLOAT64_C(1693.610000) } }, - { { SIMDE_FLOAT64_C( 995.86), SIMDE_FLOAT64_C( 529.74) }, - { SIMDE_FLOAT64_C( 79.08), SIMDE_FLOAT64_C( 947.13) }, - { SIMDE_FLOAT64_C(1942.990000), SIMDE_FLOAT64_C(450.660000) } }, - { { SIMDE_FLOAT64_C( 122.02), SIMDE_FLOAT64_C( -250.00) }, - { SIMDE_FLOAT64_C( -361.82), SIMDE_FLOAT64_C( 265.24) }, - { SIMDE_FLOAT64_C(387.260000), SIMDE_FLOAT64_C(111.820000) } }, - { { SIMDE_FLOAT64_C( 275.71), SIMDE_FLOAT64_C( 2.71) }, - { SIMDE_FLOAT64_C( 99.79), SIMDE_FLOAT64_C( -137.67) }, - { SIMDE_FLOAT64_C(138.040000), SIMDE_FLOAT64_C(-97.080000) } }, - { { SIMDE_FLOAT64_C( -761.19), SIMDE_FLOAT64_C( 813.19) }, - { SIMDE_FLOAT64_C( -897.68), SIMDE_FLOAT64_C( 653.58) }, - { SIMDE_FLOAT64_C(-107.610000), SIMDE_FLOAT64_C(1710.870000) } }, - { { SIMDE_FLOAT64_C( 396.02), SIMDE_FLOAT64_C( 413.06) }, - { SIMDE_FLOAT64_C( 514.09), SIMDE_FLOAT64_C( -977.67) }, - { SIMDE_FLOAT64_C(-581.650000), SIMDE_FLOAT64_C(-101.030000) } }, - { { SIMDE_FLOAT64_C( -671.79), SIMDE_FLOAT64_C( -92.13) }, - { SIMDE_FLOAT64_C( -441.32), SIMDE_FLOAT64_C( -374.27) }, - { SIMDE_FLOAT64_C(-1046.060000), SIMDE_FLOAT64_C(349.190000) } } - }; + {{SIMDE_FLOAT64_C(-30.36), SIMDE_FLOAT64_C(631.53)}, + {SIMDE_FLOAT64_C(850.75), SIMDE_FLOAT64_C(-263.55)}, + {SIMDE_FLOAT64_C(-293.910000), SIMDE_FLOAT64_C(-219.220000)}}, + {{SIMDE_FLOAT64_C(139.96), SIMDE_FLOAT64_C(859.14)}, + {SIMDE_FLOAT64_C(-834.47), SIMDE_FLOAT64_C(216.10)}, + {SIMDE_FLOAT64_C(356.060000), SIMDE_FLOAT64_C(1693.610000)}}, + {{SIMDE_FLOAT64_C(995.86), SIMDE_FLOAT64_C(529.74)}, + {SIMDE_FLOAT64_C(79.08), SIMDE_FLOAT64_C(947.13)}, + {SIMDE_FLOAT64_C(1942.990000), SIMDE_FLOAT64_C(450.660000)}}, + {{SIMDE_FLOAT64_C(122.02), SIMDE_FLOAT64_C(-250.00)}, + {SIMDE_FLOAT64_C(-361.82), SIMDE_FLOAT64_C(265.24)}, + {SIMDE_FLOAT64_C(387.260000), SIMDE_FLOAT64_C(111.820000)}}, + {{SIMDE_FLOAT64_C(275.71), SIMDE_FLOAT64_C(2.71)}, + {SIMDE_FLOAT64_C(99.79), SIMDE_FLOAT64_C(-137.67)}, + {SIMDE_FLOAT64_C(138.040000), SIMDE_FLOAT64_C(-97.080000)}}, + {{SIMDE_FLOAT64_C(-761.19), SIMDE_FLOAT64_C(813.19)}, + {SIMDE_FLOAT64_C(-897.68), SIMDE_FLOAT64_C(653.58)}, + {SIMDE_FLOAT64_C(-107.610000), SIMDE_FLOAT64_C(1710.870000)}}, + {{SIMDE_FLOAT64_C(396.02), SIMDE_FLOAT64_C(413.06)}, + {SIMDE_FLOAT64_C(514.09), SIMDE_FLOAT64_C(-977.67)}, + {SIMDE_FLOAT64_C(-581.650000), SIMDE_FLOAT64_C(-101.030000)}}, + {{SIMDE_FLOAT64_C(-671.79), SIMDE_FLOAT64_C(-92.13)}, + {SIMDE_FLOAT64_C(-441.32), SIMDE_FLOAT64_C(-374.27)}, + {SIMDE_FLOAT64_C(-1046.060000), SIMDE_FLOAT64_C(349.190000)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r = simde_vcaddq_rot270_f64(a, b); - simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); + simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), + 1); } - return 0; #else fputc('\n', stdout); - for (int i = 0 ; i < 8 ; i++) { + for (int i = 0; i < 8; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0f, 1000.0f); simde_float64x2_t b = simde_test_arm_neon_random_f64x2(-1000.0f, 1000.0f); simde_float64x2_t r = simde_vcaddq_rot270_f64(a, b); diff --git a/test/arm/neon/cadd_rot90.c b/test/arm/neon/cadd_rot90.c index 9119a7ae7..9097bf10a 100644 --- a/test/arm/neon/cadd_rot90.c +++ b/test/arm/neon/cadd_rot90.c @@ -1,55 +1,77 @@ #define SIMDE_TEST_ARM_NEON_INSN cadd_rot90 -#include "test-neon.h" #include "../../../simde/arm/neon/cadd_rot90.h" -static int -test_simde_vcadd_rot90_f16 (SIMDE_MUNIT_TEST_ARGS) { +#include "test-neon.h" + +static int test_simde_vcadd_rot90_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float16_t a[4]; simde_float16_t b[4]; simde_float16_t r[4]; } test_vec[] = { - { { SIMDE_FLOAT16_VALUE( 816.50), SIMDE_FLOAT16_VALUE( 773.50), SIMDE_FLOAT16_VALUE( -377.75), SIMDE_FLOAT16_VALUE( 544.50) }, - { SIMDE_FLOAT16_VALUE( -188.38), SIMDE_FLOAT16_VALUE( 269.75), SIMDE_FLOAT16_VALUE( 115.62), SIMDE_FLOAT16_VALUE( -544.00) }, - { SIMDE_FLOAT16_VALUE( 547.00), SIMDE_FLOAT16_VALUE( 585.00), SIMDE_FLOAT16_VALUE( 166.25), SIMDE_FLOAT16_VALUE( 660.00) } }, - { { SIMDE_FLOAT16_VALUE( -659.50), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( 484.00), SIMDE_FLOAT16_VALUE( -757.00) }, - { SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, - { SIMDE_FLOAT16_VALUE( -762.50), SIMDE_FLOAT16_VALUE( 1654.00), SIMDE_FLOAT16_VALUE( -414.50), SIMDE_FLOAT16_VALUE( -1138.00) } }, - { { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, - { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, - { SIMDE_FLOAT16_VALUE( 131.62), SIMDE_FLOAT16_VALUE( -120.38), SIMDE_FLOAT16_VALUE( -208.00), SIMDE_FLOAT16_VALUE( 838.00) } }, - { { SIMDE_FLOAT16_VALUE( -899.00), SIMDE_FLOAT16_VALUE( -582.50), SIMDE_FLOAT16_VALUE( 359.00), SIMDE_FLOAT16_VALUE( 342.25) }, - { SIMDE_FLOAT16_VALUE( 913.00), SIMDE_FLOAT16_VALUE( 740.50), SIMDE_FLOAT16_VALUE( -436.25), SIMDE_FLOAT16_VALUE( -904.00) }, - { SIMDE_FLOAT16_VALUE( -1640.00), SIMDE_FLOAT16_VALUE( 330.50), SIMDE_FLOAT16_VALUE( 1263.00), SIMDE_FLOAT16_VALUE( -94.00) } }, - { { SIMDE_FLOAT16_VALUE( -335.25), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( 227.25), SIMDE_FLOAT16_VALUE( -8.67) }, - { SIMDE_FLOAT16_VALUE( 490.50), SIMDE_FLOAT16_VALUE( 507.50), SIMDE_FLOAT16_VALUE( -979.50), SIMDE_FLOAT16_VALUE( -718.00) }, - { SIMDE_FLOAT16_VALUE( -843.00), SIMDE_FLOAT16_VALUE( 238.12), SIMDE_FLOAT16_VALUE( 945.00), SIMDE_FLOAT16_VALUE( -988.00) } }, - { { SIMDE_FLOAT16_VALUE( 498.50), SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -797.50), SIMDE_FLOAT16_VALUE( 741.50) }, - { SIMDE_FLOAT16_VALUE( -835.00), SIMDE_FLOAT16_VALUE( -308.00), SIMDE_FLOAT16_VALUE( -674.50), SIMDE_FLOAT16_VALUE( -247.00) }, - { SIMDE_FLOAT16_VALUE( 806.50), SIMDE_FLOAT16_VALUE( -629.00), SIMDE_FLOAT16_VALUE( -550.50), SIMDE_FLOAT16_VALUE( 67.00) } }, - { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, - { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, - { SIMDE_FLOAT16_VALUE( 755.00), SIMDE_FLOAT16_VALUE( -288.75), SIMDE_FLOAT16_VALUE( 263.00), SIMDE_FLOAT16_VALUE( -1076.00) } }, - { { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50) }, - { SIMDE_FLOAT16_VALUE( -975.50), SIMDE_FLOAT16_VALUE( 383.50), SIMDE_FLOAT16_VALUE( 500.25), SIMDE_FLOAT16_VALUE( 517.00) }, - { SIMDE_FLOAT16_VALUE( -744.50), SIMDE_FLOAT16_VALUE( -790.00), SIMDE_FLOAT16_VALUE( -456.50), SIMDE_FLOAT16_VALUE( 1250.00) } } - }; + {{SIMDE_FLOAT16_VALUE(816.50), SIMDE_FLOAT16_VALUE(773.50), + SIMDE_FLOAT16_VALUE(-377.75), SIMDE_FLOAT16_VALUE(544.50)}, + {SIMDE_FLOAT16_VALUE(-188.38), SIMDE_FLOAT16_VALUE(269.75), + SIMDE_FLOAT16_VALUE(115.62), SIMDE_FLOAT16_VALUE(-544.00)}, + {SIMDE_FLOAT16_VALUE(547.00), SIMDE_FLOAT16_VALUE(585.00), + SIMDE_FLOAT16_VALUE(166.25), SIMDE_FLOAT16_VALUE(660.00)}}, + {{SIMDE_FLOAT16_VALUE(-659.50), SIMDE_FLOAT16_VALUE(924.50), + SIMDE_FLOAT16_VALUE(484.00), SIMDE_FLOAT16_VALUE(-757.00)}, + {SIMDE_FLOAT16_VALUE(730.00), SIMDE_FLOAT16_VALUE(103.12), + SIMDE_FLOAT16_VALUE(-381.25), SIMDE_FLOAT16_VALUE(898.50)}, + {SIMDE_FLOAT16_VALUE(-762.50), SIMDE_FLOAT16_VALUE(1654.00), + SIMDE_FLOAT16_VALUE(-414.50), SIMDE_FLOAT16_VALUE(-1138.00)}}, + {{SIMDE_FLOAT16_VALUE(293.00), SIMDE_FLOAT16_VALUE(-114.06), + SIMDE_FLOAT16_VALUE(-137.62), SIMDE_FLOAT16_VALUE(722.50)}, + {SIMDE_FLOAT16_VALUE(-6.33), SIMDE_FLOAT16_VALUE(161.38), + SIMDE_FLOAT16_VALUE(115.50), SIMDE_FLOAT16_VALUE(70.38)}, + {SIMDE_FLOAT16_VALUE(131.62), SIMDE_FLOAT16_VALUE(-120.38), + SIMDE_FLOAT16_VALUE(-208.00), SIMDE_FLOAT16_VALUE(838.00)}}, + {{SIMDE_FLOAT16_VALUE(-899.00), SIMDE_FLOAT16_VALUE(-582.50), + SIMDE_FLOAT16_VALUE(359.00), SIMDE_FLOAT16_VALUE(342.25)}, + {SIMDE_FLOAT16_VALUE(913.00), SIMDE_FLOAT16_VALUE(740.50), + SIMDE_FLOAT16_VALUE(-436.25), SIMDE_FLOAT16_VALUE(-904.00)}, + {SIMDE_FLOAT16_VALUE(-1640.00), SIMDE_FLOAT16_VALUE(330.50), + SIMDE_FLOAT16_VALUE(1263.00), SIMDE_FLOAT16_VALUE(-94.00)}}, + {{SIMDE_FLOAT16_VALUE(-335.25), SIMDE_FLOAT16_VALUE(-252.38), + SIMDE_FLOAT16_VALUE(227.25), SIMDE_FLOAT16_VALUE(-8.67)}, + {SIMDE_FLOAT16_VALUE(490.50), SIMDE_FLOAT16_VALUE(507.50), + SIMDE_FLOAT16_VALUE(-979.50), SIMDE_FLOAT16_VALUE(-718.00)}, + {SIMDE_FLOAT16_VALUE(-843.00), SIMDE_FLOAT16_VALUE(238.12), + SIMDE_FLOAT16_VALUE(945.00), SIMDE_FLOAT16_VALUE(-988.00)}}, + {{SIMDE_FLOAT16_VALUE(498.50), SIMDE_FLOAT16_VALUE(205.75), + SIMDE_FLOAT16_VALUE(-797.50), SIMDE_FLOAT16_VALUE(741.50)}, + {SIMDE_FLOAT16_VALUE(-835.00), SIMDE_FLOAT16_VALUE(-308.00), + SIMDE_FLOAT16_VALUE(-674.50), SIMDE_FLOAT16_VALUE(-247.00)}, + {SIMDE_FLOAT16_VALUE(806.50), SIMDE_FLOAT16_VALUE(-629.00), + SIMDE_FLOAT16_VALUE(-550.50), SIMDE_FLOAT16_VALUE(67.00)}}, + {{SIMDE_FLOAT16_VALUE(-210.88), SIMDE_FLOAT16_VALUE(-430.75), + SIMDE_FLOAT16_VALUE(725.50), SIMDE_FLOAT16_VALUE(-944.50)}, + {SIMDE_FLOAT16_VALUE(142.00), SIMDE_FLOAT16_VALUE(-966.00), + SIMDE_FLOAT16_VALUE(-131.50), SIMDE_FLOAT16_VALUE(462.50)}, + {SIMDE_FLOAT16_VALUE(755.00), SIMDE_FLOAT16_VALUE(-288.75), + SIMDE_FLOAT16_VALUE(263.00), SIMDE_FLOAT16_VALUE(-1076.00)}}, + {{SIMDE_FLOAT16_VALUE(-361.00), SIMDE_FLOAT16_VALUE(185.62), + SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(749.50)}, + {SIMDE_FLOAT16_VALUE(-975.50), SIMDE_FLOAT16_VALUE(383.50), + SIMDE_FLOAT16_VALUE(500.25), SIMDE_FLOAT16_VALUE(517.00)}, + {SIMDE_FLOAT16_VALUE(-744.50), SIMDE_FLOAT16_VALUE(-790.00), + SIMDE_FLOAT16_VALUE(-456.50), SIMDE_FLOAT16_VALUE(1250.00)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x4_t r = simde_vcadd_rot90_f16(a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - for (int i = 0 ; i < 8 ; i++) { + for (int i = 0; i < 8; i++) { simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t r = simde_vcadd_rot90_f16(a, b); @@ -62,77 +84,123 @@ test_simde_vcadd_rot90_f16 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcaddq_rot90_f16 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcaddq_rot90_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float16_t a[8]; simde_float16_t b[8]; simde_float16_t r[8]; } test_vec[] = { - { { SIMDE_FLOAT16_VALUE( 540.00), SIMDE_FLOAT16_VALUE( -927.50), SIMDE_FLOAT16_VALUE( 17.94), SIMDE_FLOAT16_VALUE( 340.75), - SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( 854.00), SIMDE_FLOAT16_VALUE( -110.75) }, - { SIMDE_FLOAT16_VALUE( -936.50), SIMDE_FLOAT16_VALUE( -465.00), SIMDE_FLOAT16_VALUE( -104.00), SIMDE_FLOAT16_VALUE( -348.00), - SIMDE_FLOAT16_VALUE( -370.75), SIMDE_FLOAT16_VALUE( 18.20), SIMDE_FLOAT16_VALUE( -209.25), SIMDE_FLOAT16_VALUE( -856.00) }, - { SIMDE_FLOAT16_VALUE(1005.000000), SIMDE_FLOAT16_VALUE(-1864.000000), SIMDE_FLOAT16_VALUE(366.000000), SIMDE_FLOAT16_VALUE(236.750000), - SIMDE_FLOAT16_VALUE(-416.000000), SIMDE_FLOAT16_VALUE(-204.625000), SIMDE_FLOAT16_VALUE(1710.000000), SIMDE_FLOAT16_VALUE(-320.000000) } }, - { { SIMDE_FLOAT16_VALUE( 251.25), SIMDE_FLOAT16_VALUE( 523.50), SIMDE_FLOAT16_VALUE( -62.34), SIMDE_FLOAT16_VALUE( -412.00), - SIMDE_FLOAT16_VALUE( 818.50), SIMDE_FLOAT16_VALUE( -266.25), SIMDE_FLOAT16_VALUE( 558.50), SIMDE_FLOAT16_VALUE( -474.75) }, - { SIMDE_FLOAT16_VALUE( 583.50), SIMDE_FLOAT16_VALUE( -666.00), SIMDE_FLOAT16_VALUE( 875.50), SIMDE_FLOAT16_VALUE( 382.50), - SIMDE_FLOAT16_VALUE( -535.00), SIMDE_FLOAT16_VALUE( 374.50), SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 432.50) }, - { SIMDE_FLOAT16_VALUE(917.000000), SIMDE_FLOAT16_VALUE(1107.000000), SIMDE_FLOAT16_VALUE(-444.750000), SIMDE_FLOAT16_VALUE(463.500000), - SIMDE_FLOAT16_VALUE(444.000000), SIMDE_FLOAT16_VALUE(-801.000000), SIMDE_FLOAT16_VALUE(126.000000), SIMDE_FLOAT16_VALUE(-385.000000) } }, - { { SIMDE_FLOAT16_VALUE( 83.19), SIMDE_FLOAT16_VALUE( 418.00), SIMDE_FLOAT16_VALUE( -875.50), SIMDE_FLOAT16_VALUE( 830.50), - SIMDE_FLOAT16_VALUE( -633.50), SIMDE_FLOAT16_VALUE( 832.00), SIMDE_FLOAT16_VALUE( -890.00), SIMDE_FLOAT16_VALUE( 650.00) }, - { SIMDE_FLOAT16_VALUE( -111.25), SIMDE_FLOAT16_VALUE( -830.50), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( 970.50), - SIMDE_FLOAT16_VALUE( 522.50), SIMDE_FLOAT16_VALUE( -822.50), SIMDE_FLOAT16_VALUE( 411.25), SIMDE_FLOAT16_VALUE( -692.50) }, - { SIMDE_FLOAT16_VALUE(913.500000), SIMDE_FLOAT16_VALUE(306.750000), SIMDE_FLOAT16_VALUE(-1846.000000), SIMDE_FLOAT16_VALUE(890.000000), - SIMDE_FLOAT16_VALUE(189.000000), SIMDE_FLOAT16_VALUE(1354.000000), SIMDE_FLOAT16_VALUE(-197.500000), SIMDE_FLOAT16_VALUE(1061.000000) } }, - { { SIMDE_FLOAT16_VALUE( 648.50), SIMDE_FLOAT16_VALUE( 385.25), SIMDE_FLOAT16_VALUE( 479.25), SIMDE_FLOAT16_VALUE( -793.50), - SIMDE_FLOAT16_VALUE( -740.50), SIMDE_FLOAT16_VALUE( 245.00), SIMDE_FLOAT16_VALUE( 229.25), SIMDE_FLOAT16_VALUE( -113.25) }, - { SIMDE_FLOAT16_VALUE( 331.50), SIMDE_FLOAT16_VALUE( -677.50), SIMDE_FLOAT16_VALUE( 97.31), SIMDE_FLOAT16_VALUE( -52.09), - SIMDE_FLOAT16_VALUE( 239.12), SIMDE_FLOAT16_VALUE( 469.75), SIMDE_FLOAT16_VALUE( -543.50), SIMDE_FLOAT16_VALUE( 827.00) }, - { SIMDE_FLOAT16_VALUE(1326.000000), SIMDE_FLOAT16_VALUE(717.000000), SIMDE_FLOAT16_VALUE(531.500000), SIMDE_FLOAT16_VALUE(-696.000000), - SIMDE_FLOAT16_VALUE(-1210.000000), SIMDE_FLOAT16_VALUE(484.000000), SIMDE_FLOAT16_VALUE(-598.000000), SIMDE_FLOAT16_VALUE(-657.000000) } }, - { { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( 420.50), - SIMDE_FLOAT16_VALUE( -699.00), SIMDE_FLOAT16_VALUE( -151.12), SIMDE_FLOAT16_VALUE( -388.25), SIMDE_FLOAT16_VALUE( 350.75) }, - { SIMDE_FLOAT16_VALUE( 890.50), SIMDE_FLOAT16_VALUE( -665.00), SIMDE_FLOAT16_VALUE( -618.00), SIMDE_FLOAT16_VALUE( 190.88), - SIMDE_FLOAT16_VALUE( 218.12), SIMDE_FLOAT16_VALUE( -329.00), SIMDE_FLOAT16_VALUE( -549.50), SIMDE_FLOAT16_VALUE( -460.00) }, - { SIMDE_FLOAT16_VALUE(891.500000), SIMDE_FLOAT16_VALUE(1069.000000), SIMDE_FLOAT16_VALUE(-9.125000), SIMDE_FLOAT16_VALUE(-197.500000), - SIMDE_FLOAT16_VALUE(-370.000000), SIMDE_FLOAT16_VALUE(67.000000), SIMDE_FLOAT16_VALUE(71.750000), SIMDE_FLOAT16_VALUE(-198.750000) } }, - { { SIMDE_FLOAT16_VALUE( -378.00), SIMDE_FLOAT16_VALUE( -695.50), SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44), - SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00) }, - { SIMDE_FLOAT16_VALUE( -786.50), SIMDE_FLOAT16_VALUE( -974.00), SIMDE_FLOAT16_VALUE( -570.50), SIMDE_FLOAT16_VALUE( 369.25), - SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( -640.00), SIMDE_FLOAT16_VALUE( -552.00), SIMDE_FLOAT16_VALUE( 75.88) }, - { SIMDE_FLOAT16_VALUE(596.000000), SIMDE_FLOAT16_VALUE(-1482.000000), SIMDE_FLOAT16_VALUE(-247.750000), SIMDE_FLOAT16_VALUE(-649.000000), - SIMDE_FLOAT16_VALUE(-229.500000), SIMDE_FLOAT16_VALUE(-662.000000), SIMDE_FLOAT16_VALUE(381.500000), SIMDE_FLOAT16_VALUE(27.000000) } }, - { { SIMDE_FLOAT16_VALUE( 352.25), SIMDE_FLOAT16_VALUE( 324.25), SIMDE_FLOAT16_VALUE( 485.50), SIMDE_FLOAT16_VALUE( 943.50), - SIMDE_FLOAT16_VALUE( -976.00), SIMDE_FLOAT16_VALUE( 395.50), SIMDE_FLOAT16_VALUE( -100.62), SIMDE_FLOAT16_VALUE( -463.75) }, - { SIMDE_FLOAT16_VALUE( -973.00), SIMDE_FLOAT16_VALUE( -159.62), SIMDE_FLOAT16_VALUE( -377.00), SIMDE_FLOAT16_VALUE( -853.00), - SIMDE_FLOAT16_VALUE( 831.00), SIMDE_FLOAT16_VALUE( 661.00), SIMDE_FLOAT16_VALUE( -717.00), SIMDE_FLOAT16_VALUE( 891.50) }, - { SIMDE_FLOAT16_VALUE(512.000000), SIMDE_FLOAT16_VALUE(-649.000000), SIMDE_FLOAT16_VALUE(1338.000000), SIMDE_FLOAT16_VALUE(566.500000), - SIMDE_FLOAT16_VALUE(-1637.000000), SIMDE_FLOAT16_VALUE(1226.000000), SIMDE_FLOAT16_VALUE(-992.000000), SIMDE_FLOAT16_VALUE(-1181.000000) } }, - { { SIMDE_FLOAT16_VALUE( 535.50), SIMDE_FLOAT16_VALUE( 621.50), SIMDE_FLOAT16_VALUE( 578.50), SIMDE_FLOAT16_VALUE( 430.50), - SIMDE_FLOAT16_VALUE( -574.50), SIMDE_FLOAT16_VALUE( -319.75), SIMDE_FLOAT16_VALUE( -735.00), SIMDE_FLOAT16_VALUE( 248.50) }, - { SIMDE_FLOAT16_VALUE( 274.50), SIMDE_FLOAT16_VALUE( 192.38), SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 502.25), - SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( -70.81), SIMDE_FLOAT16_VALUE( -596.50), SIMDE_FLOAT16_VALUE( 810.50) }, - { SIMDE_FLOAT16_VALUE(343.000000), SIMDE_FLOAT16_VALUE(896.000000), SIMDE_FLOAT16_VALUE(76.250000), SIMDE_FLOAT16_VALUE(1110.000000), - SIMDE_FLOAT16_VALUE(-503.750000), SIMDE_FLOAT16_VALUE(-938.000000), SIMDE_FLOAT16_VALUE(-1546.000000), SIMDE_FLOAT16_VALUE(-348.000000) } } - }; + {{SIMDE_FLOAT16_VALUE(540.00), SIMDE_FLOAT16_VALUE(-927.50), + SIMDE_FLOAT16_VALUE(17.94), SIMDE_FLOAT16_VALUE(340.75), + SIMDE_FLOAT16_VALUE(-397.75), SIMDE_FLOAT16_VALUE(166.12), + SIMDE_FLOAT16_VALUE(854.00), SIMDE_FLOAT16_VALUE(-110.75)}, + {SIMDE_FLOAT16_VALUE(-936.50), SIMDE_FLOAT16_VALUE(-465.00), + SIMDE_FLOAT16_VALUE(-104.00), SIMDE_FLOAT16_VALUE(-348.00), + SIMDE_FLOAT16_VALUE(-370.75), SIMDE_FLOAT16_VALUE(18.20), + SIMDE_FLOAT16_VALUE(-209.25), SIMDE_FLOAT16_VALUE(-856.00)}, + {SIMDE_FLOAT16_VALUE(1005.000000), SIMDE_FLOAT16_VALUE(-1864.000000), + SIMDE_FLOAT16_VALUE(366.000000), SIMDE_FLOAT16_VALUE(236.750000), + SIMDE_FLOAT16_VALUE(-416.000000), SIMDE_FLOAT16_VALUE(-204.625000), + SIMDE_FLOAT16_VALUE(1710.000000), SIMDE_FLOAT16_VALUE(-320.000000)}}, + {{SIMDE_FLOAT16_VALUE(251.25), SIMDE_FLOAT16_VALUE(523.50), + SIMDE_FLOAT16_VALUE(-62.34), SIMDE_FLOAT16_VALUE(-412.00), + SIMDE_FLOAT16_VALUE(818.50), SIMDE_FLOAT16_VALUE(-266.25), + SIMDE_FLOAT16_VALUE(558.50), SIMDE_FLOAT16_VALUE(-474.75)}, + {SIMDE_FLOAT16_VALUE(583.50), SIMDE_FLOAT16_VALUE(-666.00), + SIMDE_FLOAT16_VALUE(875.50), SIMDE_FLOAT16_VALUE(382.50), + SIMDE_FLOAT16_VALUE(-535.00), SIMDE_FLOAT16_VALUE(374.50), + SIMDE_FLOAT16_VALUE(89.81), SIMDE_FLOAT16_VALUE(432.50)}, + {SIMDE_FLOAT16_VALUE(917.000000), SIMDE_FLOAT16_VALUE(1107.000000), + SIMDE_FLOAT16_VALUE(-444.750000), SIMDE_FLOAT16_VALUE(463.500000), + SIMDE_FLOAT16_VALUE(444.000000), SIMDE_FLOAT16_VALUE(-801.000000), + SIMDE_FLOAT16_VALUE(126.000000), SIMDE_FLOAT16_VALUE(-385.000000)}}, + {{SIMDE_FLOAT16_VALUE(83.19), SIMDE_FLOAT16_VALUE(418.00), + SIMDE_FLOAT16_VALUE(-875.50), SIMDE_FLOAT16_VALUE(830.50), + SIMDE_FLOAT16_VALUE(-633.50), SIMDE_FLOAT16_VALUE(832.00), + SIMDE_FLOAT16_VALUE(-890.00), SIMDE_FLOAT16_VALUE(650.00)}, + {SIMDE_FLOAT16_VALUE(-111.25), SIMDE_FLOAT16_VALUE(-830.50), + SIMDE_FLOAT16_VALUE(59.75), SIMDE_FLOAT16_VALUE(970.50), + SIMDE_FLOAT16_VALUE(522.50), SIMDE_FLOAT16_VALUE(-822.50), + SIMDE_FLOAT16_VALUE(411.25), SIMDE_FLOAT16_VALUE(-692.50)}, + {SIMDE_FLOAT16_VALUE(913.500000), SIMDE_FLOAT16_VALUE(306.750000), + SIMDE_FLOAT16_VALUE(-1846.000000), SIMDE_FLOAT16_VALUE(890.000000), + SIMDE_FLOAT16_VALUE(189.000000), SIMDE_FLOAT16_VALUE(1354.000000), + SIMDE_FLOAT16_VALUE(-197.500000), SIMDE_FLOAT16_VALUE(1061.000000)}}, + {{SIMDE_FLOAT16_VALUE(648.50), SIMDE_FLOAT16_VALUE(385.25), + SIMDE_FLOAT16_VALUE(479.25), SIMDE_FLOAT16_VALUE(-793.50), + SIMDE_FLOAT16_VALUE(-740.50), SIMDE_FLOAT16_VALUE(245.00), + SIMDE_FLOAT16_VALUE(229.25), SIMDE_FLOAT16_VALUE(-113.25)}, + {SIMDE_FLOAT16_VALUE(331.50), SIMDE_FLOAT16_VALUE(-677.50), + SIMDE_FLOAT16_VALUE(97.31), SIMDE_FLOAT16_VALUE(-52.09), + SIMDE_FLOAT16_VALUE(239.12), SIMDE_FLOAT16_VALUE(469.75), + SIMDE_FLOAT16_VALUE(-543.50), SIMDE_FLOAT16_VALUE(827.00)}, + {SIMDE_FLOAT16_VALUE(1326.000000), SIMDE_FLOAT16_VALUE(717.000000), + SIMDE_FLOAT16_VALUE(531.500000), SIMDE_FLOAT16_VALUE(-696.000000), + SIMDE_FLOAT16_VALUE(-1210.000000), SIMDE_FLOAT16_VALUE(484.000000), + SIMDE_FLOAT16_VALUE(-598.000000), SIMDE_FLOAT16_VALUE(-657.000000)}}, + {{SIMDE_FLOAT16_VALUE(226.38), SIMDE_FLOAT16_VALUE(178.88), + SIMDE_FLOAT16_VALUE(181.75), SIMDE_FLOAT16_VALUE(420.50), + SIMDE_FLOAT16_VALUE(-699.00), SIMDE_FLOAT16_VALUE(-151.12), + SIMDE_FLOAT16_VALUE(-388.25), SIMDE_FLOAT16_VALUE(350.75)}, + {SIMDE_FLOAT16_VALUE(890.50), SIMDE_FLOAT16_VALUE(-665.00), + SIMDE_FLOAT16_VALUE(-618.00), SIMDE_FLOAT16_VALUE(190.88), + SIMDE_FLOAT16_VALUE(218.12), SIMDE_FLOAT16_VALUE(-329.00), + SIMDE_FLOAT16_VALUE(-549.50), SIMDE_FLOAT16_VALUE(-460.00)}, + {SIMDE_FLOAT16_VALUE(891.500000), SIMDE_FLOAT16_VALUE(1069.000000), + SIMDE_FLOAT16_VALUE(-9.125000), SIMDE_FLOAT16_VALUE(-197.500000), + SIMDE_FLOAT16_VALUE(-370.000000), SIMDE_FLOAT16_VALUE(67.000000), + SIMDE_FLOAT16_VALUE(71.750000), SIMDE_FLOAT16_VALUE(-198.750000)}}, + {{SIMDE_FLOAT16_VALUE(-378.00), SIMDE_FLOAT16_VALUE(-695.50), + SIMDE_FLOAT16_VALUE(121.50), SIMDE_FLOAT16_VALUE(-78.44), + SIMDE_FLOAT16_VALUE(-869.50), SIMDE_FLOAT16_VALUE(-710.00), + SIMDE_FLOAT16_VALUE(457.25), SIMDE_FLOAT16_VALUE(579.00)}, + {SIMDE_FLOAT16_VALUE(-786.50), SIMDE_FLOAT16_VALUE(-974.00), + SIMDE_FLOAT16_VALUE(-570.50), SIMDE_FLOAT16_VALUE(369.25), + SIMDE_FLOAT16_VALUE(48.06), SIMDE_FLOAT16_VALUE(-640.00), + SIMDE_FLOAT16_VALUE(-552.00), SIMDE_FLOAT16_VALUE(75.88)}, + {SIMDE_FLOAT16_VALUE(596.000000), SIMDE_FLOAT16_VALUE(-1482.000000), + SIMDE_FLOAT16_VALUE(-247.750000), SIMDE_FLOAT16_VALUE(-649.000000), + SIMDE_FLOAT16_VALUE(-229.500000), SIMDE_FLOAT16_VALUE(-662.000000), + SIMDE_FLOAT16_VALUE(381.500000), SIMDE_FLOAT16_VALUE(27.000000)}}, + {{SIMDE_FLOAT16_VALUE(352.25), SIMDE_FLOAT16_VALUE(324.25), + SIMDE_FLOAT16_VALUE(485.50), SIMDE_FLOAT16_VALUE(943.50), + SIMDE_FLOAT16_VALUE(-976.00), SIMDE_FLOAT16_VALUE(395.50), + SIMDE_FLOAT16_VALUE(-100.62), SIMDE_FLOAT16_VALUE(-463.75)}, + {SIMDE_FLOAT16_VALUE(-973.00), SIMDE_FLOAT16_VALUE(-159.62), + SIMDE_FLOAT16_VALUE(-377.00), SIMDE_FLOAT16_VALUE(-853.00), + SIMDE_FLOAT16_VALUE(831.00), SIMDE_FLOAT16_VALUE(661.00), + SIMDE_FLOAT16_VALUE(-717.00), SIMDE_FLOAT16_VALUE(891.50)}, + {SIMDE_FLOAT16_VALUE(512.000000), SIMDE_FLOAT16_VALUE(-649.000000), + SIMDE_FLOAT16_VALUE(1338.000000), SIMDE_FLOAT16_VALUE(566.500000), + SIMDE_FLOAT16_VALUE(-1637.000000), SIMDE_FLOAT16_VALUE(1226.000000), + SIMDE_FLOAT16_VALUE(-992.000000), SIMDE_FLOAT16_VALUE(-1181.000000)}}, + {{SIMDE_FLOAT16_VALUE(535.50), SIMDE_FLOAT16_VALUE(621.50), + SIMDE_FLOAT16_VALUE(578.50), SIMDE_FLOAT16_VALUE(430.50), + SIMDE_FLOAT16_VALUE(-574.50), SIMDE_FLOAT16_VALUE(-319.75), + SIMDE_FLOAT16_VALUE(-735.00), SIMDE_FLOAT16_VALUE(248.50)}, + {SIMDE_FLOAT16_VALUE(274.50), SIMDE_FLOAT16_VALUE(192.38), + SIMDE_FLOAT16_VALUE(679.00), SIMDE_FLOAT16_VALUE(502.25), + SIMDE_FLOAT16_VALUE(-618.50), SIMDE_FLOAT16_VALUE(-70.81), + SIMDE_FLOAT16_VALUE(-596.50), SIMDE_FLOAT16_VALUE(810.50)}, + {SIMDE_FLOAT16_VALUE(343.000000), SIMDE_FLOAT16_VALUE(896.000000), + SIMDE_FLOAT16_VALUE(76.250000), SIMDE_FLOAT16_VALUE(1110.000000), + SIMDE_FLOAT16_VALUE(-503.750000), SIMDE_FLOAT16_VALUE(-938.000000), + SIMDE_FLOAT16_VALUE(-1546.000000), SIMDE_FLOAT16_VALUE(-348.000000)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); simde_float16x8_t r = simde_vcaddq_rot90_f16(a, b); - simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), + 1); } - return 0; #else fputc('\n', stdout); - for (int i = 0 ; i < 8 ; i++) { + for (int i = 0; i < 8; i++) { simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t r = simde_vcaddq_rot90_f16(a, b); @@ -145,53 +213,50 @@ test_simde_vcaddq_rot90_f16 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcadd_rot90_f32 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcadd_rot90_f32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { - { { SIMDE_FLOAT32_C( 863.66), SIMDE_FLOAT32_C( 828.31) }, - { SIMDE_FLOAT32_C( -563.51), SIMDE_FLOAT32_C( -576.51) }, - { SIMDE_FLOAT32_C(1440.169922), SIMDE_FLOAT32_C(264.799988) } }, - { { SIMDE_FLOAT32_C( -703.45), SIMDE_FLOAT32_C( 383.90) }, - { SIMDE_FLOAT32_C( -772.46), SIMDE_FLOAT32_C( 457.40) }, - { SIMDE_FLOAT32_C(-1160.849976), SIMDE_FLOAT32_C(-388.560028) } }, - { { SIMDE_FLOAT32_C( 295.99), SIMDE_FLOAT32_C( 653.10) }, - { SIMDE_FLOAT32_C( -120.98), SIMDE_FLOAT32_C( 945.50) }, - { SIMDE_FLOAT32_C(-649.510010), SIMDE_FLOAT32_C(532.119995) } }, - { { SIMDE_FLOAT32_C( -280.81), SIMDE_FLOAT32_C( 631.32) }, - { SIMDE_FLOAT32_C( 688.34), SIMDE_FLOAT32_C( 191.95) }, - { SIMDE_FLOAT32_C(-472.760010), SIMDE_FLOAT32_C(1319.660034) } }, - { { SIMDE_FLOAT32_C( -522.88), SIMDE_FLOAT32_C( -323.79) }, - { SIMDE_FLOAT32_C( -887.99), SIMDE_FLOAT32_C( -283.70) }, - { SIMDE_FLOAT32_C(-239.179993), SIMDE_FLOAT32_C(-1211.780029) } }, - { { SIMDE_FLOAT32_C( -117.76), SIMDE_FLOAT32_C( -841.45) }, - { SIMDE_FLOAT32_C( 664.94), SIMDE_FLOAT32_C( -987.19) }, - { SIMDE_FLOAT32_C(869.429993), SIMDE_FLOAT32_C(-176.510010) } }, - { { SIMDE_FLOAT32_C( -642.89), SIMDE_FLOAT32_C( -152.10) }, - { SIMDE_FLOAT32_C( 963.83), SIMDE_FLOAT32_C( 919.89) }, - { SIMDE_FLOAT32_C(-1562.780029), SIMDE_FLOAT32_C(811.729980) } }, - { { SIMDE_FLOAT32_C( 630.40), SIMDE_FLOAT32_C( -669.33) }, - { SIMDE_FLOAT32_C( 671.13), SIMDE_FLOAT32_C( 256.93) }, - { SIMDE_FLOAT32_C(373.470032), SIMDE_FLOAT32_C( 1.799988) } } - }; + {{SIMDE_FLOAT32_C(863.66), SIMDE_FLOAT32_C(828.31)}, + {SIMDE_FLOAT32_C(-563.51), SIMDE_FLOAT32_C(-576.51)}, + {SIMDE_FLOAT32_C(1440.169922), SIMDE_FLOAT32_C(264.799988)}}, + {{SIMDE_FLOAT32_C(-703.45), SIMDE_FLOAT32_C(383.90)}, + {SIMDE_FLOAT32_C(-772.46), SIMDE_FLOAT32_C(457.40)}, + {SIMDE_FLOAT32_C(-1160.849976), SIMDE_FLOAT32_C(-388.560028)}}, + {{SIMDE_FLOAT32_C(295.99), SIMDE_FLOAT32_C(653.10)}, + {SIMDE_FLOAT32_C(-120.98), SIMDE_FLOAT32_C(945.50)}, + {SIMDE_FLOAT32_C(-649.510010), SIMDE_FLOAT32_C(532.119995)}}, + {{SIMDE_FLOAT32_C(-280.81), SIMDE_FLOAT32_C(631.32)}, + {SIMDE_FLOAT32_C(688.34), SIMDE_FLOAT32_C(191.95)}, + {SIMDE_FLOAT32_C(-472.760010), SIMDE_FLOAT32_C(1319.660034)}}, + {{SIMDE_FLOAT32_C(-522.88), SIMDE_FLOAT32_C(-323.79)}, + {SIMDE_FLOAT32_C(-887.99), SIMDE_FLOAT32_C(-283.70)}, + {SIMDE_FLOAT32_C(-239.179993), SIMDE_FLOAT32_C(-1211.780029)}}, + {{SIMDE_FLOAT32_C(-117.76), SIMDE_FLOAT32_C(-841.45)}, + {SIMDE_FLOAT32_C(664.94), SIMDE_FLOAT32_C(-987.19)}, + {SIMDE_FLOAT32_C(869.429993), SIMDE_FLOAT32_C(-176.510010)}}, + {{SIMDE_FLOAT32_C(-642.89), SIMDE_FLOAT32_C(-152.10)}, + {SIMDE_FLOAT32_C(963.83), SIMDE_FLOAT32_C(919.89)}, + {SIMDE_FLOAT32_C(-1562.780029), SIMDE_FLOAT32_C(811.729980)}}, + {{SIMDE_FLOAT32_C(630.40), SIMDE_FLOAT32_C(-669.33)}, + {SIMDE_FLOAT32_C(671.13), SIMDE_FLOAT32_C(256.93)}, + {SIMDE_FLOAT32_C(373.470032), SIMDE_FLOAT32_C(1.799988)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vcadd_rot90_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } - - + return 0; #else fputc('\n', stdout); - for (int i = 0 ; i < 8 ; i++) { + for (int i = 0; i < 8; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vcadd_rot90_f32(a, b); @@ -204,51 +269,74 @@ test_simde_vcadd_rot90_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcaddq_rot90_f32 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcaddq_rot90_f32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { - { { SIMDE_FLOAT32_C( -337.31), SIMDE_FLOAT32_C( -857.36), SIMDE_FLOAT32_C( 334.71), SIMDE_FLOAT32_C( -617.33) }, - { SIMDE_FLOAT32_C( -439.38), SIMDE_FLOAT32_C( 245.13), SIMDE_FLOAT32_C( 111.06), SIMDE_FLOAT32_C( 520.69) }, - { SIMDE_FLOAT32_C(-582.440002), SIMDE_FLOAT32_C(-1296.739990), SIMDE_FLOAT32_C(-185.980011), SIMDE_FLOAT32_C(-506.270020) } }, - { { SIMDE_FLOAT32_C( 85.49), SIMDE_FLOAT32_C( 250.19), SIMDE_FLOAT32_C( -679.96), SIMDE_FLOAT32_C( -750.25) }, - { SIMDE_FLOAT32_C( -138.26), SIMDE_FLOAT32_C( -14.62), SIMDE_FLOAT32_C( -921.52), SIMDE_FLOAT32_C( 225.91) }, - { SIMDE_FLOAT32_C(100.110001), SIMDE_FLOAT32_C(111.930008), SIMDE_FLOAT32_C(-905.869995), SIMDE_FLOAT32_C(-1671.770020) } }, - { { SIMDE_FLOAT32_C( 242.83), SIMDE_FLOAT32_C( 869.28), SIMDE_FLOAT32_C( 297.95), SIMDE_FLOAT32_C( 105.66) }, - { SIMDE_FLOAT32_C( -722.51), SIMDE_FLOAT32_C( -802.37), SIMDE_FLOAT32_C( -245.78), SIMDE_FLOAT32_C( 915.39) }, - { SIMDE_FLOAT32_C(1045.199951), SIMDE_FLOAT32_C(146.770020), SIMDE_FLOAT32_C(-617.440002), SIMDE_FLOAT32_C(-140.119995) } }, - { { SIMDE_FLOAT32_C( 54.20), SIMDE_FLOAT32_C( -928.06), SIMDE_FLOAT32_C( 362.39), SIMDE_FLOAT32_C( -936.63) }, - { SIMDE_FLOAT32_C( 185.82), SIMDE_FLOAT32_C( -244.43), SIMDE_FLOAT32_C( 924.66), SIMDE_FLOAT32_C( -643.82) }, - { SIMDE_FLOAT32_C(298.630005), SIMDE_FLOAT32_C(-742.239990), SIMDE_FLOAT32_C(1006.210022), SIMDE_FLOAT32_C(-11.970032) } }, - { { SIMDE_FLOAT32_C( -516.92), SIMDE_FLOAT32_C( -615.16), SIMDE_FLOAT32_C( -751.52), SIMDE_FLOAT32_C( -974.04) }, - { SIMDE_FLOAT32_C( -144.42), SIMDE_FLOAT32_C( 338.27), SIMDE_FLOAT32_C( 704.92), SIMDE_FLOAT32_C( 116.90) }, - { SIMDE_FLOAT32_C(-855.189941), SIMDE_FLOAT32_C(-759.579956), SIMDE_FLOAT32_C(-868.420044), SIMDE_FLOAT32_C(-269.119995) } }, - { { SIMDE_FLOAT32_C( 49.39), SIMDE_FLOAT32_C( -363.00), SIMDE_FLOAT32_C( -476.30), SIMDE_FLOAT32_C( 106.71) }, - { SIMDE_FLOAT32_C( -725.84), SIMDE_FLOAT32_C( -353.71), SIMDE_FLOAT32_C( 268.41), SIMDE_FLOAT32_C( 728.83) }, - { SIMDE_FLOAT32_C(403.099976), SIMDE_FLOAT32_C(-1088.840088), SIMDE_FLOAT32_C(-1205.130005), SIMDE_FLOAT32_C(375.119995) } }, - { { SIMDE_FLOAT32_C( -470.26), SIMDE_FLOAT32_C( 81.87), SIMDE_FLOAT32_C( 72.27), SIMDE_FLOAT32_C( -992.59) }, - { SIMDE_FLOAT32_C( -615.67), SIMDE_FLOAT32_C( 620.28), SIMDE_FLOAT32_C( -606.51), SIMDE_FLOAT32_C( -327.77) }, - { SIMDE_FLOAT32_C(-1090.540039), SIMDE_FLOAT32_C(-533.799988), SIMDE_FLOAT32_C(400.039978), SIMDE_FLOAT32_C(-1599.100098) } }, - { { SIMDE_FLOAT32_C( -331.80), SIMDE_FLOAT32_C( -605.94), SIMDE_FLOAT32_C( -295.86), SIMDE_FLOAT32_C( -275.42) }, - { SIMDE_FLOAT32_C( -752.64), SIMDE_FLOAT32_C( 21.91), SIMDE_FLOAT32_C( 826.83), SIMDE_FLOAT32_C( 599.82) }, - { SIMDE_FLOAT32_C(-353.709991), SIMDE_FLOAT32_C(-1358.580078), SIMDE_FLOAT32_C(-895.679993), SIMDE_FLOAT32_C(551.410034) } } - }; + {{SIMDE_FLOAT32_C(-337.31), SIMDE_FLOAT32_C(-857.36), + SIMDE_FLOAT32_C(334.71), SIMDE_FLOAT32_C(-617.33)}, + {SIMDE_FLOAT32_C(-439.38), SIMDE_FLOAT32_C(245.13), + SIMDE_FLOAT32_C(111.06), SIMDE_FLOAT32_C(520.69)}, + {SIMDE_FLOAT32_C(-582.440002), SIMDE_FLOAT32_C(-1296.739990), + SIMDE_FLOAT32_C(-185.980011), SIMDE_FLOAT32_C(-506.270020)}}, + {{SIMDE_FLOAT32_C(85.49), SIMDE_FLOAT32_C(250.19), + SIMDE_FLOAT32_C(-679.96), SIMDE_FLOAT32_C(-750.25)}, + {SIMDE_FLOAT32_C(-138.26), SIMDE_FLOAT32_C(-14.62), + SIMDE_FLOAT32_C(-921.52), SIMDE_FLOAT32_C(225.91)}, + {SIMDE_FLOAT32_C(100.110001), SIMDE_FLOAT32_C(111.930008), + SIMDE_FLOAT32_C(-905.869995), SIMDE_FLOAT32_C(-1671.770020)}}, + {{SIMDE_FLOAT32_C(242.83), SIMDE_FLOAT32_C(869.28), + SIMDE_FLOAT32_C(297.95), SIMDE_FLOAT32_C(105.66)}, + {SIMDE_FLOAT32_C(-722.51), SIMDE_FLOAT32_C(-802.37), + SIMDE_FLOAT32_C(-245.78), SIMDE_FLOAT32_C(915.39)}, + {SIMDE_FLOAT32_C(1045.199951), SIMDE_FLOAT32_C(146.770020), + SIMDE_FLOAT32_C(-617.440002), SIMDE_FLOAT32_C(-140.119995)}}, + {{SIMDE_FLOAT32_C(54.20), SIMDE_FLOAT32_C(-928.06), + SIMDE_FLOAT32_C(362.39), SIMDE_FLOAT32_C(-936.63)}, + {SIMDE_FLOAT32_C(185.82), SIMDE_FLOAT32_C(-244.43), + SIMDE_FLOAT32_C(924.66), SIMDE_FLOAT32_C(-643.82)}, + {SIMDE_FLOAT32_C(298.630005), SIMDE_FLOAT32_C(-742.239990), + SIMDE_FLOAT32_C(1006.210022), SIMDE_FLOAT32_C(-11.970032)}}, + {{SIMDE_FLOAT32_C(-516.92), SIMDE_FLOAT32_C(-615.16), + SIMDE_FLOAT32_C(-751.52), SIMDE_FLOAT32_C(-974.04)}, + {SIMDE_FLOAT32_C(-144.42), SIMDE_FLOAT32_C(338.27), + SIMDE_FLOAT32_C(704.92), SIMDE_FLOAT32_C(116.90)}, + {SIMDE_FLOAT32_C(-855.189941), SIMDE_FLOAT32_C(-759.579956), + SIMDE_FLOAT32_C(-868.420044), SIMDE_FLOAT32_C(-269.119995)}}, + {{SIMDE_FLOAT32_C(49.39), SIMDE_FLOAT32_C(-363.00), + SIMDE_FLOAT32_C(-476.30), SIMDE_FLOAT32_C(106.71)}, + {SIMDE_FLOAT32_C(-725.84), SIMDE_FLOAT32_C(-353.71), + SIMDE_FLOAT32_C(268.41), SIMDE_FLOAT32_C(728.83)}, + {SIMDE_FLOAT32_C(403.099976), SIMDE_FLOAT32_C(-1088.840088), + SIMDE_FLOAT32_C(-1205.130005), SIMDE_FLOAT32_C(375.119995)}}, + {{SIMDE_FLOAT32_C(-470.26), SIMDE_FLOAT32_C(81.87), + SIMDE_FLOAT32_C(72.27), SIMDE_FLOAT32_C(-992.59)}, + {SIMDE_FLOAT32_C(-615.67), SIMDE_FLOAT32_C(620.28), + SIMDE_FLOAT32_C(-606.51), SIMDE_FLOAT32_C(-327.77)}, + {SIMDE_FLOAT32_C(-1090.540039), SIMDE_FLOAT32_C(-533.799988), + SIMDE_FLOAT32_C(400.039978), SIMDE_FLOAT32_C(-1599.100098)}}, + {{SIMDE_FLOAT32_C(-331.80), SIMDE_FLOAT32_C(-605.94), + SIMDE_FLOAT32_C(-295.86), SIMDE_FLOAT32_C(-275.42)}, + {SIMDE_FLOAT32_C(-752.64), SIMDE_FLOAT32_C(21.91), + SIMDE_FLOAT32_C(826.83), SIMDE_FLOAT32_C(599.82)}, + {SIMDE_FLOAT32_C(-353.709991), SIMDE_FLOAT32_C(-1358.580078), + SIMDE_FLOAT32_C(-895.679993), SIMDE_FLOAT32_C(551.410034)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vcaddq_rot90_f32(a, b); - simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), + 1); } - + return 0; #else fputc('\n', stdout); - for (int i = 0 ; i < 8 ; i++) { + for (int i = 0; i < 8; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcaddq_rot90_f32(a, b); @@ -261,51 +349,50 @@ test_simde_vcaddq_rot90_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcaddq_rot90_f64 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcaddq_rot90_f64(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { - { { SIMDE_FLOAT64_C( -30.36), SIMDE_FLOAT64_C( 631.53) }, - { SIMDE_FLOAT64_C( 850.75), SIMDE_FLOAT64_C( -263.55) }, - { SIMDE_FLOAT64_C(233.190000), SIMDE_FLOAT64_C(1482.280000) } }, - { { SIMDE_FLOAT64_C( 139.96), SIMDE_FLOAT64_C( 859.14) }, - { SIMDE_FLOAT64_C( -834.47), SIMDE_FLOAT64_C( 216.10) }, - { SIMDE_FLOAT64_C(-76.140000), SIMDE_FLOAT64_C(24.670000) } }, - { { SIMDE_FLOAT64_C( 995.86), SIMDE_FLOAT64_C( 529.74) }, - { SIMDE_FLOAT64_C( 79.08), SIMDE_FLOAT64_C( 947.13) }, - { SIMDE_FLOAT64_C(48.730000), SIMDE_FLOAT64_C(608.820000) } }, - { { SIMDE_FLOAT64_C( 122.02), SIMDE_FLOAT64_C( -250.00) }, - { SIMDE_FLOAT64_C( -361.82), SIMDE_FLOAT64_C( 265.24) }, - { SIMDE_FLOAT64_C(-143.220000), SIMDE_FLOAT64_C(-611.820000) } }, - { { SIMDE_FLOAT64_C( 275.71), SIMDE_FLOAT64_C( 2.71) }, - { SIMDE_FLOAT64_C( 99.79), SIMDE_FLOAT64_C( -137.67) }, - { SIMDE_FLOAT64_C(413.380000), SIMDE_FLOAT64_C(102.500000) } }, - { { SIMDE_FLOAT64_C( -761.19), SIMDE_FLOAT64_C( 813.19) }, - { SIMDE_FLOAT64_C( -897.68), SIMDE_FLOAT64_C( 653.58) }, - { SIMDE_FLOAT64_C(-1414.770000), SIMDE_FLOAT64_C(-84.490000) } }, - { { SIMDE_FLOAT64_C( 396.02), SIMDE_FLOAT64_C( 413.06) }, - { SIMDE_FLOAT64_C( 514.09), SIMDE_FLOAT64_C( -977.67) }, - { SIMDE_FLOAT64_C(1373.690000), SIMDE_FLOAT64_C(927.150000) } }, - { { SIMDE_FLOAT64_C( -671.79), SIMDE_FLOAT64_C( -92.13) }, - { SIMDE_FLOAT64_C( -441.32), SIMDE_FLOAT64_C( -374.27) }, - { SIMDE_FLOAT64_C(-297.520000), SIMDE_FLOAT64_C(-533.450000) } } - }; + {{SIMDE_FLOAT64_C(-30.36), SIMDE_FLOAT64_C(631.53)}, + {SIMDE_FLOAT64_C(850.75), SIMDE_FLOAT64_C(-263.55)}, + {SIMDE_FLOAT64_C(233.190000), SIMDE_FLOAT64_C(1482.280000)}}, + {{SIMDE_FLOAT64_C(139.96), SIMDE_FLOAT64_C(859.14)}, + {SIMDE_FLOAT64_C(-834.47), SIMDE_FLOAT64_C(216.10)}, + {SIMDE_FLOAT64_C(-76.140000), SIMDE_FLOAT64_C(24.670000)}}, + {{SIMDE_FLOAT64_C(995.86), SIMDE_FLOAT64_C(529.74)}, + {SIMDE_FLOAT64_C(79.08), SIMDE_FLOAT64_C(947.13)}, + {SIMDE_FLOAT64_C(48.730000), SIMDE_FLOAT64_C(608.820000)}}, + {{SIMDE_FLOAT64_C(122.02), SIMDE_FLOAT64_C(-250.00)}, + {SIMDE_FLOAT64_C(-361.82), SIMDE_FLOAT64_C(265.24)}, + {SIMDE_FLOAT64_C(-143.220000), SIMDE_FLOAT64_C(-611.820000)}}, + {{SIMDE_FLOAT64_C(275.71), SIMDE_FLOAT64_C(2.71)}, + {SIMDE_FLOAT64_C(99.79), SIMDE_FLOAT64_C(-137.67)}, + {SIMDE_FLOAT64_C(413.380000), SIMDE_FLOAT64_C(102.500000)}}, + {{SIMDE_FLOAT64_C(-761.19), SIMDE_FLOAT64_C(813.19)}, + {SIMDE_FLOAT64_C(-897.68), SIMDE_FLOAT64_C(653.58)}, + {SIMDE_FLOAT64_C(-1414.770000), SIMDE_FLOAT64_C(-84.490000)}}, + {{SIMDE_FLOAT64_C(396.02), SIMDE_FLOAT64_C(413.06)}, + {SIMDE_FLOAT64_C(514.09), SIMDE_FLOAT64_C(-977.67)}, + {SIMDE_FLOAT64_C(1373.690000), SIMDE_FLOAT64_C(927.150000)}}, + {{SIMDE_FLOAT64_C(-671.79), SIMDE_FLOAT64_C(-92.13)}, + {SIMDE_FLOAT64_C(-441.32), SIMDE_FLOAT64_C(-374.27)}, + {SIMDE_FLOAT64_C(-297.520000), SIMDE_FLOAT64_C(-533.450000)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r = simde_vcaddq_rot90_f64(a, b); - simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); + simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), + 1); } - + return 0; #else fputc('\n', stdout); - for (int i = 0 ; i < 8 ; i++) { + for (int i = 0; i < 8; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0f, 1000.0f); simde_float64x2_t b = simde_test_arm_neon_random_f64x2(-1000.0f, 1000.0f); simde_float64x2_t r = simde_vcaddq_rot90_f64(a, b); diff --git a/test/arm/neon/cmla_lane.c b/test/arm/neon/cmla_lane.c index eea427b8f..41e4af538 100644 --- a/test/arm/neon/cmla_lane.c +++ b/test/arm/neon/cmla_lane.c @@ -1,11 +1,11 @@ #define SIMDE_TEST_ARM_NEON_INSN cmla_lane -#include "test-neon.h" #include "../../../simde/arm/neon/cmla_lane.h" + #include "../../../simde/arm/neon/dup_n.h" +#include "test-neon.h" -static int -test_simde_vcmla_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmla_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float16_t r_[4]; @@ -14,71 +14,106 @@ test_simde_vcmla_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[4]; } test_vec[] = { - { - { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) }, - { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -31.19) }, - { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -48.00) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-7724.000000), SIMDE_FLOAT16_VALUE(-7784.000000), SIMDE_FLOAT16_VALUE(-10416.000000), SIMDE_FLOAT16_VALUE(-10352.000000) } }, - { { SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, - { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, - { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(48000.000000), SIMDE_FLOAT16_VALUE(47392.000000), SIMDE_FLOAT16_VALUE(-22592.000000), SIMDE_FLOAT16_VALUE(-21312.000000) } }, - { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, - { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, - { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-51488.000000), SIMDE_FLOAT16_VALUE(-51680.000000), SIMDE_FLOAT16_VALUE(48192.000000), SIMDE_FLOAT16_VALUE(46528.000000) } }, - { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -180.50) }, - { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 129.00), SIMDE_FLOAT16_VALUE( 99.06), SIMDE_FLOAT16_VALUE( -75.25) }, - { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 239.50), SIMDE_FLOAT16_VALUE( -29.96), SIMDE_FLOAT16_VALUE( -177.50) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(-37536.000000), SIMDE_FLOAT16_VALUE(-37824.000000), SIMDE_FLOAT16_VALUE(23584.000000), SIMDE_FLOAT16_VALUE(23552.000000) } }, - { { SIMDE_FLOAT16_VALUE( 167.25), SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( -63.38), SIMDE_FLOAT16_VALUE( 57.00) }, - { SIMDE_FLOAT16_VALUE( 191.75), SIMDE_FLOAT16_VALUE( -197.00), SIMDE_FLOAT16_VALUE( 285.00), SIMDE_FLOAT16_VALUE( -529.00) }, - { SIMDE_FLOAT16_VALUE( -180.50), SIMDE_FLOAT16_VALUE( 375.50), SIMDE_FLOAT16_VALUE( -206.00), SIMDE_FLOAT16_VALUE( -75.25) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-34432.000000), SIMDE_FLOAT16_VALUE(-34624.000000), SIMDE_FLOAT16_VALUE(-51520.000000), SIMDE_FLOAT16_VALUE(-51392.000000) } }, - { { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( -41.00), SIMDE_FLOAT16_VALUE( 285.00) }, - { SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -271.25), SIMDE_FLOAT16_VALUE( -257.50) }, - { SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( -180.50), SIMDE_FLOAT16_VALUE( 69.62), SIMDE_FLOAT16_VALUE( 131.38) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(243.125000), SIMDE_FLOAT16_VALUE(199.000000), SIMDE_FLOAT16_VALUE(48928.000000), SIMDE_FLOAT16_VALUE(49248.000000) } }, - { { SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -247.00), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 17.94) }, - { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( -110.75), SIMDE_FLOAT16_VALUE( 18.20) }, - { SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 97.31) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-35520.000000), SIMDE_FLOAT16_VALUE(-35968.000000), SIMDE_FLOAT16_VALUE(-9888.000000), SIMDE_FLOAT16_VALUE(-9928.000000) } }, - { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00) }, - { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( -151.12) }, - { SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44), SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( 75.88) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(-18624.000000), SIMDE_FLOAT16_VALUE(-18464.000000), SIMDE_FLOAT16_VALUE(-13800.000000), SIMDE_FLOAT16_VALUE(-13680.000000) } } - + {{SIMDE_FLOAT16_VALUE(-49.28), SIMDE_FLOAT16_VALUE(-109.00), + SIMDE_FLOAT16_VALUE(-626.50), SIMDE_FLOAT16_VALUE(-567.00)}, + {SIMDE_FLOAT16_VALUE(-178.88), SIMDE_FLOAT16_VALUE(10.22), + SIMDE_FLOAT16_VALUE(-228.12), SIMDE_FLOAT16_VALUE(-31.19)}, + {SIMDE_FLOAT16_VALUE(42.90), SIMDE_FLOAT16_VALUE(-98.75), + SIMDE_FLOAT16_VALUE(350.00), SIMDE_FLOAT16_VALUE(-48.00)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-7724.000000), SIMDE_FLOAT16_VALUE(-7784.000000), + SIMDE_FLOAT16_VALUE(-10416.000000), + SIMDE_FLOAT16_VALUE(-10352.000000)}}, + {{SIMDE_FLOAT16_VALUE(730.00), SIMDE_FLOAT16_VALUE(103.12), + SIMDE_FLOAT16_VALUE(-381.25), SIMDE_FLOAT16_VALUE(898.50)}, + {SIMDE_FLOAT16_VALUE(293.00), SIMDE_FLOAT16_VALUE(-114.06), + SIMDE_FLOAT16_VALUE(-137.62), SIMDE_FLOAT16_VALUE(722.50)}, + {SIMDE_FLOAT16_VALUE(-6.33), SIMDE_FLOAT16_VALUE(161.38), + SIMDE_FLOAT16_VALUE(115.50), SIMDE_FLOAT16_VALUE(70.38)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(48000.000000), SIMDE_FLOAT16_VALUE(47392.000000), + SIMDE_FLOAT16_VALUE(-22592.000000), + SIMDE_FLOAT16_VALUE(-21312.000000)}}, + {{SIMDE_FLOAT16_VALUE(-210.88), SIMDE_FLOAT16_VALUE(-430.75), + SIMDE_FLOAT16_VALUE(725.50), SIMDE_FLOAT16_VALUE(-944.50)}, + {SIMDE_FLOAT16_VALUE(142.00), SIMDE_FLOAT16_VALUE(-966.00), + SIMDE_FLOAT16_VALUE(-131.50), SIMDE_FLOAT16_VALUE(462.50)}, + {SIMDE_FLOAT16_VALUE(-361.00), SIMDE_FLOAT16_VALUE(185.62), + SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(749.50)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-51488.000000), SIMDE_FLOAT16_VALUE(-51680.000000), + SIMDE_FLOAT16_VALUE(48192.000000), SIMDE_FLOAT16_VALUE(46528.000000)}}, + {{SIMDE_FLOAT16_VALUE(89.44), SIMDE_FLOAT16_VALUE(-200.50), + SIMDE_FLOAT16_VALUE(-136.50), SIMDE_FLOAT16_VALUE(-180.50)}, + {SIMDE_FLOAT16_VALUE(-157.12), SIMDE_FLOAT16_VALUE(129.00), + SIMDE_FLOAT16_VALUE(99.06), SIMDE_FLOAT16_VALUE(-75.25)}, + {SIMDE_FLOAT16_VALUE(32.59), SIMDE_FLOAT16_VALUE(239.50), + SIMDE_FLOAT16_VALUE(-29.96), SIMDE_FLOAT16_VALUE(-177.50)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-37536.000000), SIMDE_FLOAT16_VALUE(-37824.000000), + SIMDE_FLOAT16_VALUE(23584.000000), SIMDE_FLOAT16_VALUE(23552.000000)}}, + {{SIMDE_FLOAT16_VALUE(167.25), SIMDE_FLOAT16_VALUE(-1.52), + SIMDE_FLOAT16_VALUE(-63.38), SIMDE_FLOAT16_VALUE(57.00)}, + {SIMDE_FLOAT16_VALUE(191.75), SIMDE_FLOAT16_VALUE(-197.00), + SIMDE_FLOAT16_VALUE(285.00), SIMDE_FLOAT16_VALUE(-529.00)}, + {SIMDE_FLOAT16_VALUE(-180.50), SIMDE_FLOAT16_VALUE(375.50), + SIMDE_FLOAT16_VALUE(-206.00), SIMDE_FLOAT16_VALUE(-75.25)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-34432.000000), SIMDE_FLOAT16_VALUE(-34624.000000), + SIMDE_FLOAT16_VALUE(-51520.000000), + SIMDE_FLOAT16_VALUE(-51392.000000)}}, + {{SIMDE_FLOAT16_VALUE(-31.19), SIMDE_FLOAT16_VALUE(-75.25), + SIMDE_FLOAT16_VALUE(-41.00), SIMDE_FLOAT16_VALUE(285.00)}, + {SIMDE_FLOAT16_VALUE(-1.52), SIMDE_FLOAT16_VALUE(10.22), + SIMDE_FLOAT16_VALUE(-271.25), SIMDE_FLOAT16_VALUE(-257.50)}, + {SIMDE_FLOAT16_VALUE(-31.45), SIMDE_FLOAT16_VALUE(-180.50), + SIMDE_FLOAT16_VALUE(69.62), SIMDE_FLOAT16_VALUE(131.38)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(243.125000), SIMDE_FLOAT16_VALUE(199.000000), + SIMDE_FLOAT16_VALUE(48928.000000), SIMDE_FLOAT16_VALUE(49248.000000)}}, + {{SIMDE_FLOAT16_VALUE(205.75), SIMDE_FLOAT16_VALUE(-247.00), + SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(17.94)}, + {SIMDE_FLOAT16_VALUE(-397.75), SIMDE_FLOAT16_VALUE(166.12), + SIMDE_FLOAT16_VALUE(-110.75), SIMDE_FLOAT16_VALUE(18.20)}, + {SIMDE_FLOAT16_VALUE(89.81), SIMDE_FLOAT16_VALUE(59.75), + SIMDE_FLOAT16_VALUE(-113.25), SIMDE_FLOAT16_VALUE(97.31)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-35520.000000), SIMDE_FLOAT16_VALUE(-35968.000000), + SIMDE_FLOAT16_VALUE(-9888.000000), SIMDE_FLOAT16_VALUE(-9928.000000)}}, + {{SIMDE_FLOAT16_VALUE(-869.50), SIMDE_FLOAT16_VALUE(-710.00), + SIMDE_FLOAT16_VALUE(457.25), SIMDE_FLOAT16_VALUE(579.00)}, + {SIMDE_FLOAT16_VALUE(226.38), SIMDE_FLOAT16_VALUE(178.88), + SIMDE_FLOAT16_VALUE(181.75), SIMDE_FLOAT16_VALUE(-151.12)}, + {SIMDE_FLOAT16_VALUE(121.50), SIMDE_FLOAT16_VALUE(-78.44), + SIMDE_FLOAT16_VALUE(48.06), SIMDE_FLOAT16_VALUE(75.88)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-18624.000000), SIMDE_FLOAT16_VALUE(-18464.000000), + SIMDE_FLOAT16_VALUE(-13800.000000), SIMDE_FLOAT16_VALUE(-13680.000000)}} + }; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x4_t r; - SIMDE_CONSTIFY_2_(simde_vcmla_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_( + simde_vcmla_lane_f16, r, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); - } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; + for (int i = 0; i < 8; i++) { simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t r = simde_vcmla_lane_f16(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -89,8 +124,7 @@ test_simde_vcmla_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmla_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmla_lane_f32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[2]; @@ -99,49 +133,48 @@ test_simde_vcmla_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[2]; } test_vec[] = { - { { SIMDE_FLOAT32_C( 83.21), SIMDE_FLOAT32_C( 417.90) }, - { SIMDE_FLOAT32_C( -875.72), SIMDE_FLOAT32_C( 830.54) }, - { SIMDE_FLOAT32_C( -633.53), SIMDE_FLOAT32_C( 832.17) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(554878.125000), SIMDE_FLOAT32_C(555212.812500) } }, - { { SIMDE_FLOAT32_C( -890.17), SIMDE_FLOAT32_C( 649.92) }, - { SIMDE_FLOAT32_C( -111.22), SIMDE_FLOAT32_C( -830.36) }, - { SIMDE_FLOAT32_C( 59.76), SIMDE_FLOAT32_C( 970.61) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-7536.677246), SIMDE_FLOAT32_C(-5996.586914) } }, - { { SIMDE_FLOAT32_C( 522.31), SIMDE_FLOAT32_C( -822.40) }, - { SIMDE_FLOAT32_C( 411.34), SIMDE_FLOAT32_C( -692.35) }, - { SIMDE_FLOAT32_C( 648.71), SIMDE_FLOAT32_C( 385.20) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(267362.687500), SIMDE_FLOAT32_C(266017.968750) } }, - { { SIMDE_FLOAT32_C( 479.18), SIMDE_FLOAT32_C( -793.73) }, - { SIMDE_FLOAT32_C( -740.26), SIMDE_FLOAT32_C( 245.04) }, - { SIMDE_FLOAT32_C( 229.26), SIMDE_FLOAT32_C( -113.23) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-169232.828125), SIMDE_FLOAT32_C(-170505.734375) } }, - { { SIMDE_FLOAT32_C( 331.48), SIMDE_FLOAT32_C( -677.34) }, - { SIMDE_FLOAT32_C( 97.30), SIMDE_FLOAT32_C( -52.10) }, - { SIMDE_FLOAT32_C( 239.17), SIMDE_FLOAT32_C( 469.68) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(23602.720703), SIMDE_FLOAT32_C(22593.902344) } }, - { { SIMDE_FLOAT32_C( -543.40), SIMDE_FLOAT32_C( 826.85) }, - { SIMDE_FLOAT32_C( 226.38), SIMDE_FLOAT32_C( 178.84) }, - { SIMDE_FLOAT32_C( 181.71), SIMDE_FLOAT32_C( 420.52) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(40592.113281), SIMDE_FLOAT32_C(41962.363281) } }, - { { SIMDE_FLOAT32_C( -698.84), SIMDE_FLOAT32_C( -151.15) }, - { SIMDE_FLOAT32_C( -388.27), SIMDE_FLOAT32_C( 350.81) }, - { SIMDE_FLOAT32_C( 890.40), SIMDE_FLOAT32_C( -664.75) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-346414.437500), SIMDE_FLOAT32_C(-345866.750000) } }, - { { SIMDE_FLOAT32_C( -617.94), SIMDE_FLOAT32_C( 190.84) }, - { SIMDE_FLOAT32_C( 218.13), SIMDE_FLOAT32_C( -328.97) }, - { SIMDE_FLOAT32_C( -549.59), SIMDE_FLOAT32_C( -459.89) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-120500.015625), SIMDE_FLOAT32_C(-119691.234375) } } - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + {{SIMDE_FLOAT32_C(83.21), SIMDE_FLOAT32_C(417.90)}, + {SIMDE_FLOAT32_C(-875.72), SIMDE_FLOAT32_C(830.54)}, + {SIMDE_FLOAT32_C(-633.53), SIMDE_FLOAT32_C(832.17)}, + INT32_C(0), + {SIMDE_FLOAT32_C(554878.125000), SIMDE_FLOAT32_C(555212.812500)}}, + {{SIMDE_FLOAT32_C(-890.17), SIMDE_FLOAT32_C(649.92)}, + {SIMDE_FLOAT32_C(-111.22), SIMDE_FLOAT32_C(-830.36)}, + {SIMDE_FLOAT32_C(59.76), SIMDE_FLOAT32_C(970.61)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-7536.677246), SIMDE_FLOAT32_C(-5996.586914)}}, + {{SIMDE_FLOAT32_C(522.31), SIMDE_FLOAT32_C(-822.40)}, + {SIMDE_FLOAT32_C(411.34), SIMDE_FLOAT32_C(-692.35)}, + {SIMDE_FLOAT32_C(648.71), SIMDE_FLOAT32_C(385.20)}, + INT32_C(0), + {SIMDE_FLOAT32_C(267362.687500), SIMDE_FLOAT32_C(266017.968750)}}, + {{SIMDE_FLOAT32_C(479.18), SIMDE_FLOAT32_C(-793.73)}, + {SIMDE_FLOAT32_C(-740.26), SIMDE_FLOAT32_C(245.04)}, + {SIMDE_FLOAT32_C(229.26), SIMDE_FLOAT32_C(-113.23)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-169232.828125), SIMDE_FLOAT32_C(-170505.734375)}}, + {{SIMDE_FLOAT32_C(331.48), SIMDE_FLOAT32_C(-677.34)}, + {SIMDE_FLOAT32_C(97.30), SIMDE_FLOAT32_C(-52.10)}, + {SIMDE_FLOAT32_C(239.17), SIMDE_FLOAT32_C(469.68)}, + INT32_C(0), + {SIMDE_FLOAT32_C(23602.720703), SIMDE_FLOAT32_C(22593.902344)}}, + {{SIMDE_FLOAT32_C(-543.40), SIMDE_FLOAT32_C(826.85)}, + {SIMDE_FLOAT32_C(226.38), SIMDE_FLOAT32_C(178.84)}, + {SIMDE_FLOAT32_C(181.71), SIMDE_FLOAT32_C(420.52)}, + INT32_C(0), + {SIMDE_FLOAT32_C(40592.113281), SIMDE_FLOAT32_C(41962.363281)}}, + {{SIMDE_FLOAT32_C(-698.84), SIMDE_FLOAT32_C(-151.15)}, + {SIMDE_FLOAT32_C(-388.27), SIMDE_FLOAT32_C(350.81)}, + {SIMDE_FLOAT32_C(890.40), SIMDE_FLOAT32_C(-664.75)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-346414.437500), SIMDE_FLOAT32_C(-345866.750000)}}, + {{SIMDE_FLOAT32_C(-617.94), SIMDE_FLOAT32_C(190.84)}, + {SIMDE_FLOAT32_C(218.13), SIMDE_FLOAT32_C(-328.97)}, + {SIMDE_FLOAT32_C(-549.59), SIMDE_FLOAT32_C(-459.89)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-120500.015625), SIMDE_FLOAT32_C(-119691.234375)}}}; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); @@ -149,18 +182,17 @@ test_simde_vcmla_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 0, 0, 0, 0, 0, 0, 0}; + for (int i = 0; i < 8; i++) { simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vcmla_lane_f32(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -171,9 +203,7 @@ test_simde_vcmla_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } - -static int -test_simde_vcmla_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmla_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float16_t r_[4]; @@ -182,76 +212,120 @@ test_simde_vcmla_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[4]; } test_vec[] = { - { { SIMDE_FLOAT16_VALUE( 816.50), SIMDE_FLOAT16_VALUE( 773.50), SIMDE_FLOAT16_VALUE( -377.75), SIMDE_FLOAT16_VALUE( 544.50) }, - { SIMDE_FLOAT16_VALUE( -188.38), SIMDE_FLOAT16_VALUE( 269.75), SIMDE_FLOAT16_VALUE( 115.62), SIMDE_FLOAT16_VALUE( -544.00) }, - { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( 484.00), SIMDE_FLOAT16_VALUE( -757.00), - SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-7264.000000), SIMDE_FLOAT16_VALUE(-7308.000000), SIMDE_FLOAT16_VALUE(4584.000000), SIMDE_FLOAT16_VALUE(5504.000000) } }, - { { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, - { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, - { SIMDE_FLOAT16_VALUE( -899.00), SIMDE_FLOAT16_VALUE( 32.51), SIMDE_FLOAT16_VALUE( 359.00), SIMDE_FLOAT16_VALUE( 342.25), - SIMDE_FLOAT16_VALUE( 913.00), SIMDE_FLOAT16_VALUE( 740.50), SIMDE_FLOAT16_VALUE( -436.25), SIMDE_FLOAT16_VALUE( -904.00) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(87.312500), SIMDE_FLOAT16_VALUE(-319.750000), SIMDE_FLOAT16_VALUE(3616.000000), SIMDE_FLOAT16_VALUE(4476.000000) } }, - { { SIMDE_FLOAT16_VALUE( -335.25), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( 227.25), SIMDE_FLOAT16_VALUE( -8.67) }, - { SIMDE_FLOAT16_VALUE( 490.50), SIMDE_FLOAT16_VALUE( 507.50), SIMDE_FLOAT16_VALUE( -979.50), SIMDE_FLOAT16_VALUE( -718.00) }, - { SIMDE_FLOAT16_VALUE( -10.20), SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -797.50), SIMDE_FLOAT16_VALUE( 741.50), - SIMDE_FLOAT16_VALUE( -835.00), SIMDE_FLOAT16_VALUE( -308.00), SIMDE_FLOAT16_VALUE( -674.50), SIMDE_FLOAT16_VALUE( -247.00) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-5340.000000), SIMDE_FLOAT16_VALUE(-5256.000000), SIMDE_FLOAT16_VALUE(10224.000000), SIMDE_FLOAT16_VALUE(9984.000000) } }, - { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, - { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, - { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50), - SIMDE_FLOAT16_VALUE( -975.50), SIMDE_FLOAT16_VALUE( 383.50), SIMDE_FLOAT16_VALUE( 500.25), SIMDE_FLOAT16_VALUE( 517.00) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(26144.000000), SIMDE_FLOAT16_VALUE(25920.000000), SIMDE_FLOAT16_VALUE(-23680.000000), SIMDE_FLOAT16_VALUE(-25360.000000) } }, - { { SIMDE_FLOAT16_VALUE( 540.00), SIMDE_FLOAT16_VALUE( -927.50), SIMDE_FLOAT16_VALUE( 17.94), SIMDE_FLOAT16_VALUE( 340.75) }, - { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( 854.00), SIMDE_FLOAT16_VALUE( -110.75) }, - { SIMDE_FLOAT16_VALUE( -53.36), SIMDE_FLOAT16_VALUE( -465.00), SIMDE_FLOAT16_VALUE( -104.00), SIMDE_FLOAT16_VALUE( -348.00), - SIMDE_FLOAT16_VALUE( -370.75), SIMDE_FLOAT16_VALUE( 18.20), SIMDE_FLOAT16_VALUE( -209.25), SIMDE_FLOAT16_VALUE( -856.00) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(21776.000000), SIMDE_FLOAT16_VALUE(20304.000000), SIMDE_FLOAT16_VALUE(-45568.000000), SIMDE_FLOAT16_VALUE(-45248.000000) } }, - { { SIMDE_FLOAT16_VALUE( 251.25), SIMDE_FLOAT16_VALUE( 523.50), SIMDE_FLOAT16_VALUE( -62.34), SIMDE_FLOAT16_VALUE( -412.00) }, - { SIMDE_FLOAT16_VALUE( 818.50), SIMDE_FLOAT16_VALUE( -266.25), SIMDE_FLOAT16_VALUE( 558.50), SIMDE_FLOAT16_VALUE( -474.75) }, - { SIMDE_FLOAT16_VALUE( 583.50), SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 875.50), SIMDE_FLOAT16_VALUE( 382.50), - SIMDE_FLOAT16_VALUE( -535.00), SIMDE_FLOAT16_VALUE( 374.50), SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 432.50) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(48256.000000), SIMDE_FLOAT16_VALUE(48544.000000), SIMDE_FLOAT16_VALUE(32704.000000), SIMDE_FLOAT16_VALUE(32352.000000) } }, - { { SIMDE_FLOAT16_VALUE( 83.19), SIMDE_FLOAT16_VALUE( 418.00), SIMDE_FLOAT16_VALUE( -875.50), SIMDE_FLOAT16_VALUE( 830.50) }, - { SIMDE_FLOAT16_VALUE( -633.50), SIMDE_FLOAT16_VALUE( 832.00), SIMDE_FLOAT16_VALUE( -890.00), SIMDE_FLOAT16_VALUE( 650.00) }, - { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -830.50), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( 970.50), - SIMDE_FLOAT16_VALUE( 522.50), SIMDE_FLOAT16_VALUE( -822.50), SIMDE_FLOAT16_VALUE( 411.25), SIMDE_FLOAT16_VALUE( -692.50) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(19840.000000), SIMDE_FLOAT16_VALUE(20176.000000), SIMDE_FLOAT16_VALUE(26880.000000), SIMDE_FLOAT16_VALUE(28592.000000) } }, - { { SIMDE_FLOAT16_VALUE( 648.50), SIMDE_FLOAT16_VALUE( 385.25), SIMDE_FLOAT16_VALUE( 479.25), SIMDE_FLOAT16_VALUE( -793.50) }, - { SIMDE_FLOAT16_VALUE( -740.50), SIMDE_FLOAT16_VALUE( 245.00), SIMDE_FLOAT16_VALUE( 229.25), SIMDE_FLOAT16_VALUE( -113.25) }, - { SIMDE_FLOAT16_VALUE( 331.50), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( 97.31), SIMDE_FLOAT16_VALUE( -52.09), - SIMDE_FLOAT16_VALUE( 239.12), SIMDE_FLOAT16_VALUE( 469.75), SIMDE_FLOAT16_VALUE( -543.50), SIMDE_FLOAT16_VALUE( 827.00) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(56384.000000), SIMDE_FLOAT16_VALUE(56096.000000), SIMDE_FLOAT16_VALUE(-16768.000000), SIMDE_FLOAT16_VALUE(-18048.000000) } } - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + {{SIMDE_FLOAT16_VALUE(816.50), SIMDE_FLOAT16_VALUE(773.50), + SIMDE_FLOAT16_VALUE(-377.75), SIMDE_FLOAT16_VALUE(544.50)}, + {SIMDE_FLOAT16_VALUE(-188.38), SIMDE_FLOAT16_VALUE(269.75), + SIMDE_FLOAT16_VALUE(115.62), SIMDE_FLOAT16_VALUE(-544.00)}, + {SIMDE_FLOAT16_VALUE(42.90), SIMDE_FLOAT16_VALUE(924.50), + SIMDE_FLOAT16_VALUE(484.00), SIMDE_FLOAT16_VALUE(-757.00), + SIMDE_FLOAT16_VALUE(730.00), SIMDE_FLOAT16_VALUE(103.12), + SIMDE_FLOAT16_VALUE(-381.25), SIMDE_FLOAT16_VALUE(898.50)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-7264.000000), SIMDE_FLOAT16_VALUE(-7308.000000), + SIMDE_FLOAT16_VALUE(4584.000000), SIMDE_FLOAT16_VALUE(5504.000000)}}, + {{SIMDE_FLOAT16_VALUE(293.00), SIMDE_FLOAT16_VALUE(-114.06), + SIMDE_FLOAT16_VALUE(-137.62), SIMDE_FLOAT16_VALUE(722.50)}, + {SIMDE_FLOAT16_VALUE(-6.33), SIMDE_FLOAT16_VALUE(161.38), + SIMDE_FLOAT16_VALUE(115.50), SIMDE_FLOAT16_VALUE(70.38)}, + {SIMDE_FLOAT16_VALUE(-899.00), SIMDE_FLOAT16_VALUE(32.51), + SIMDE_FLOAT16_VALUE(359.00), SIMDE_FLOAT16_VALUE(342.25), + SIMDE_FLOAT16_VALUE(913.00), SIMDE_FLOAT16_VALUE(740.50), + SIMDE_FLOAT16_VALUE(-436.25), SIMDE_FLOAT16_VALUE(-904.00)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(87.312500), SIMDE_FLOAT16_VALUE(-319.750000), + SIMDE_FLOAT16_VALUE(3616.000000), SIMDE_FLOAT16_VALUE(4476.000000)}}, + {{SIMDE_FLOAT16_VALUE(-335.25), SIMDE_FLOAT16_VALUE(-252.38), + SIMDE_FLOAT16_VALUE(227.25), SIMDE_FLOAT16_VALUE(-8.67)}, + {SIMDE_FLOAT16_VALUE(490.50), SIMDE_FLOAT16_VALUE(507.50), + SIMDE_FLOAT16_VALUE(-979.50), SIMDE_FLOAT16_VALUE(-718.00)}, + {SIMDE_FLOAT16_VALUE(-10.20), SIMDE_FLOAT16_VALUE(205.75), + SIMDE_FLOAT16_VALUE(-797.50), SIMDE_FLOAT16_VALUE(741.50), + SIMDE_FLOAT16_VALUE(-835.00), SIMDE_FLOAT16_VALUE(-308.00), + SIMDE_FLOAT16_VALUE(-674.50), SIMDE_FLOAT16_VALUE(-247.00)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-5340.000000), SIMDE_FLOAT16_VALUE(-5256.000000), + SIMDE_FLOAT16_VALUE(10224.000000), SIMDE_FLOAT16_VALUE(9984.000000)}}, + {{SIMDE_FLOAT16_VALUE(-210.88), SIMDE_FLOAT16_VALUE(-430.75), + SIMDE_FLOAT16_VALUE(725.50), SIMDE_FLOAT16_VALUE(-944.50)}, + {SIMDE_FLOAT16_VALUE(142.00), SIMDE_FLOAT16_VALUE(-966.00), + SIMDE_FLOAT16_VALUE(-131.50), SIMDE_FLOAT16_VALUE(462.50)}, + {SIMDE_FLOAT16_VALUE(-361.00), SIMDE_FLOAT16_VALUE(185.62), + SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(749.50), + SIMDE_FLOAT16_VALUE(-975.50), SIMDE_FLOAT16_VALUE(383.50), + SIMDE_FLOAT16_VALUE(500.25), SIMDE_FLOAT16_VALUE(517.00)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(26144.000000), SIMDE_FLOAT16_VALUE(25920.000000), + SIMDE_FLOAT16_VALUE(-23680.000000), + SIMDE_FLOAT16_VALUE(-25360.000000)}}, + {{SIMDE_FLOAT16_VALUE(540.00), SIMDE_FLOAT16_VALUE(-927.50), + SIMDE_FLOAT16_VALUE(17.94), SIMDE_FLOAT16_VALUE(340.75)}, + {SIMDE_FLOAT16_VALUE(-397.75), SIMDE_FLOAT16_VALUE(166.12), + SIMDE_FLOAT16_VALUE(854.00), SIMDE_FLOAT16_VALUE(-110.75)}, + {SIMDE_FLOAT16_VALUE(-53.36), SIMDE_FLOAT16_VALUE(-465.00), + SIMDE_FLOAT16_VALUE(-104.00), SIMDE_FLOAT16_VALUE(-348.00), + SIMDE_FLOAT16_VALUE(-370.75), SIMDE_FLOAT16_VALUE(18.20), + SIMDE_FLOAT16_VALUE(-209.25), SIMDE_FLOAT16_VALUE(-856.00)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(21776.000000), SIMDE_FLOAT16_VALUE(20304.000000), + SIMDE_FLOAT16_VALUE(-45568.000000), + SIMDE_FLOAT16_VALUE(-45248.000000)}}, + {{SIMDE_FLOAT16_VALUE(251.25), SIMDE_FLOAT16_VALUE(523.50), + SIMDE_FLOAT16_VALUE(-62.34), SIMDE_FLOAT16_VALUE(-412.00)}, + {SIMDE_FLOAT16_VALUE(818.50), SIMDE_FLOAT16_VALUE(-266.25), + SIMDE_FLOAT16_VALUE(558.50), SIMDE_FLOAT16_VALUE(-474.75)}, + {SIMDE_FLOAT16_VALUE(583.50), SIMDE_FLOAT16_VALUE(58.66), + SIMDE_FLOAT16_VALUE(875.50), SIMDE_FLOAT16_VALUE(382.50), + SIMDE_FLOAT16_VALUE(-535.00), SIMDE_FLOAT16_VALUE(374.50), + SIMDE_FLOAT16_VALUE(89.81), SIMDE_FLOAT16_VALUE(432.50)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(48256.000000), SIMDE_FLOAT16_VALUE(48544.000000), + SIMDE_FLOAT16_VALUE(32704.000000), SIMDE_FLOAT16_VALUE(32352.000000)}}, + {{SIMDE_FLOAT16_VALUE(83.19), SIMDE_FLOAT16_VALUE(418.00), + SIMDE_FLOAT16_VALUE(-875.50), SIMDE_FLOAT16_VALUE(830.50)}, + {SIMDE_FLOAT16_VALUE(-633.50), SIMDE_FLOAT16_VALUE(832.00), + SIMDE_FLOAT16_VALUE(-890.00), SIMDE_FLOAT16_VALUE(650.00)}, + {SIMDE_FLOAT16_VALUE(-31.19), SIMDE_FLOAT16_VALUE(-830.50), + SIMDE_FLOAT16_VALUE(59.75), SIMDE_FLOAT16_VALUE(970.50), + SIMDE_FLOAT16_VALUE(522.50), SIMDE_FLOAT16_VALUE(-822.50), + SIMDE_FLOAT16_VALUE(411.25), SIMDE_FLOAT16_VALUE(-692.50)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(19840.000000), SIMDE_FLOAT16_VALUE(20176.000000), + SIMDE_FLOAT16_VALUE(26880.000000), SIMDE_FLOAT16_VALUE(28592.000000)}}, + {{SIMDE_FLOAT16_VALUE(648.50), SIMDE_FLOAT16_VALUE(385.25), + SIMDE_FLOAT16_VALUE(479.25), SIMDE_FLOAT16_VALUE(-793.50)}, + {SIMDE_FLOAT16_VALUE(-740.50), SIMDE_FLOAT16_VALUE(245.00), + SIMDE_FLOAT16_VALUE(229.25), SIMDE_FLOAT16_VALUE(-113.25)}, + {SIMDE_FLOAT16_VALUE(331.50), SIMDE_FLOAT16_VALUE(-75.25), + SIMDE_FLOAT16_VALUE(97.31), SIMDE_FLOAT16_VALUE(-52.09), + SIMDE_FLOAT16_VALUE(239.12), SIMDE_FLOAT16_VALUE(469.75), + SIMDE_FLOAT16_VALUE(-543.50), SIMDE_FLOAT16_VALUE(827.00)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(56384.000000), SIMDE_FLOAT16_VALUE(56096.000000), + SIMDE_FLOAT16_VALUE(-16768.000000), + SIMDE_FLOAT16_VALUE(-18048.000000)}}}; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); simde_float16x4_t r; - SIMDE_CONSTIFY_2_(simde_vcmla_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_( + simde_vcmla_laneq_f16, r, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; + for (int i = 0; i < 8; i++) { simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x4_t r = simde_vcmla_laneq_f16(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -262,8 +336,7 @@ test_simde_vcmla_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmla_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmla_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[2]; @@ -272,69 +345,78 @@ test_simde_vcmla_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[2]; } test_vec[] = { - { { SIMDE_FLOAT32_C( -470.26), SIMDE_FLOAT32_C( 81.87) }, - { SIMDE_FLOAT32_C( 72.27), SIMDE_FLOAT32_C( -992.59) }, - { SIMDE_FLOAT32_C( -615.67), SIMDE_FLOAT32_C( 620.28), SIMDE_FLOAT32_C( -606.51), SIMDE_FLOAT32_C( -327.77) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-44964.726562), SIMDE_FLOAT32_C(-44412.597656) } }, - { { SIMDE_FLOAT32_C( -331.80), SIMDE_FLOAT32_C( -605.94) }, - { SIMDE_FLOAT32_C( -295.86), SIMDE_FLOAT32_C( -275.42) }, - { SIMDE_FLOAT32_C( -752.64), SIMDE_FLOAT32_C( 21.91), SIMDE_FLOAT32_C( 826.83), SIMDE_FLOAT32_C( 599.82) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(-6814.092285), SIMDE_FLOAT32_C(-7088.232422) } }, - { { SIMDE_FLOAT32_C( -30.36), SIMDE_FLOAT32_C( 631.53) }, - { SIMDE_FLOAT32_C( 850.75), SIMDE_FLOAT32_C( -263.55) }, - { SIMDE_FLOAT32_C( 139.96), SIMDE_FLOAT32_C( 859.14), SIMDE_FLOAT32_C( -834.47), SIMDE_FLOAT32_C( 216.10) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(119040.617188), SIMDE_FLOAT32_C(119702.507812) } }, - { { SIMDE_FLOAT32_C( 995.86), SIMDE_FLOAT32_C( 529.74) }, - { SIMDE_FLOAT32_C( 79.08), SIMDE_FLOAT32_C( 947.13) }, - { SIMDE_FLOAT32_C( 122.02), SIMDE_FLOAT32_C( -250.00), SIMDE_FLOAT32_C( -361.82), SIMDE_FLOAT32_C( 265.24) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(-18774.140625), SIMDE_FLOAT32_C(-19240.259766) } }, - { { SIMDE_FLOAT32_C( 275.71), SIMDE_FLOAT32_C( 2.71) }, - { SIMDE_FLOAT32_C( 99.79), SIMDE_FLOAT32_C( -137.67) }, - { SIMDE_FLOAT32_C( -761.19), SIMDE_FLOAT32_C( 813.19), SIMDE_FLOAT32_C( -897.68), SIMDE_FLOAT32_C( 653.58) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-75683.437500), SIMDE_FLOAT32_C(-75956.437500) } }, - { { SIMDE_FLOAT32_C( 396.02), SIMDE_FLOAT32_C( 413.06) }, - { SIMDE_FLOAT32_C( 514.09), SIMDE_FLOAT32_C( -977.67) }, - { SIMDE_FLOAT32_C( -671.79), SIMDE_FLOAT32_C( -92.13), SIMDE_FLOAT32_C( -441.32), SIMDE_FLOAT32_C( -374.27) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(-46967.093750), SIMDE_FLOAT32_C(-46950.054688) } }, - { { SIMDE_FLOAT32_C( -151.97), SIMDE_FLOAT32_C( -79.55) }, - { SIMDE_FLOAT32_C( -214.62), SIMDE_FLOAT32_C( -614.75) }, - { SIMDE_FLOAT32_C( 678.79), SIMDE_FLOAT32_C( 783.83), SIMDE_FLOAT32_C( 493.05), SIMDE_FLOAT32_C( -896.00) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-145833.875000), SIMDE_FLOAT32_C(-145761.453125) } }, - { { SIMDE_FLOAT32_C( 104.13), SIMDE_FLOAT32_C( 278.54) }, - { SIMDE_FLOAT32_C( 171.54), SIMDE_FLOAT32_C( -682.63) }, - { SIMDE_FLOAT32_C( 217.09), SIMDE_FLOAT32_C( 49.35), SIMDE_FLOAT32_C( 256.50), SIMDE_FLOAT32_C( -92.04) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(8569.627930), SIMDE_FLOAT32_C(8744.038086) } } - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + {{SIMDE_FLOAT32_C(-470.26), SIMDE_FLOAT32_C(81.87)}, + {SIMDE_FLOAT32_C(72.27), SIMDE_FLOAT32_C(-992.59)}, + {SIMDE_FLOAT32_C(-615.67), SIMDE_FLOAT32_C(620.28), + SIMDE_FLOAT32_C(-606.51), SIMDE_FLOAT32_C(-327.77)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-44964.726562), SIMDE_FLOAT32_C(-44412.597656)}}, + {{SIMDE_FLOAT32_C(-331.80), SIMDE_FLOAT32_C(-605.94)}, + {SIMDE_FLOAT32_C(-295.86), SIMDE_FLOAT32_C(-275.42)}, + {SIMDE_FLOAT32_C(-752.64), SIMDE_FLOAT32_C(21.91), + SIMDE_FLOAT32_C(826.83), SIMDE_FLOAT32_C(599.82)}, + INT32_C(1), + {SIMDE_FLOAT32_C(-6814.092285), SIMDE_FLOAT32_C(-7088.232422)}}, + {{SIMDE_FLOAT32_C(-30.36), SIMDE_FLOAT32_C(631.53)}, + {SIMDE_FLOAT32_C(850.75), SIMDE_FLOAT32_C(-263.55)}, + {SIMDE_FLOAT32_C(139.96), SIMDE_FLOAT32_C(859.14), + SIMDE_FLOAT32_C(-834.47), SIMDE_FLOAT32_C(216.10)}, + INT32_C(0), + {SIMDE_FLOAT32_C(119040.617188), SIMDE_FLOAT32_C(119702.507812)}}, + {{SIMDE_FLOAT32_C(995.86), SIMDE_FLOAT32_C(529.74)}, + {SIMDE_FLOAT32_C(79.08), SIMDE_FLOAT32_C(947.13)}, + {SIMDE_FLOAT32_C(122.02), SIMDE_FLOAT32_C(-250.00), + SIMDE_FLOAT32_C(-361.82), SIMDE_FLOAT32_C(265.24)}, + INT32_C(1), + {SIMDE_FLOAT32_C(-18774.140625), SIMDE_FLOAT32_C(-19240.259766)}}, + {{SIMDE_FLOAT32_C(275.71), SIMDE_FLOAT32_C(2.71)}, + {SIMDE_FLOAT32_C(99.79), SIMDE_FLOAT32_C(-137.67)}, + {SIMDE_FLOAT32_C(-761.19), SIMDE_FLOAT32_C(813.19), + SIMDE_FLOAT32_C(-897.68), SIMDE_FLOAT32_C(653.58)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-75683.437500), SIMDE_FLOAT32_C(-75956.437500)}}, + {{SIMDE_FLOAT32_C(396.02), SIMDE_FLOAT32_C(413.06)}, + {SIMDE_FLOAT32_C(514.09), SIMDE_FLOAT32_C(-977.67)}, + {SIMDE_FLOAT32_C(-671.79), SIMDE_FLOAT32_C(-92.13), + SIMDE_FLOAT32_C(-441.32), SIMDE_FLOAT32_C(-374.27)}, + INT32_C(1), + {SIMDE_FLOAT32_C(-46967.093750), SIMDE_FLOAT32_C(-46950.054688)}}, + {{SIMDE_FLOAT32_C(-151.97), SIMDE_FLOAT32_C(-79.55)}, + {SIMDE_FLOAT32_C(-214.62), SIMDE_FLOAT32_C(-614.75)}, + {SIMDE_FLOAT32_C(678.79), SIMDE_FLOAT32_C(783.83), + SIMDE_FLOAT32_C(493.05), SIMDE_FLOAT32_C(-896.00)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-145833.875000), SIMDE_FLOAT32_C(-145761.453125)}}, + {{SIMDE_FLOAT32_C(104.13), SIMDE_FLOAT32_C(278.54)}, + {SIMDE_FLOAT32_C(171.54), SIMDE_FLOAT32_C(-682.63)}, + {SIMDE_FLOAT32_C(217.09), SIMDE_FLOAT32_C(49.35), + SIMDE_FLOAT32_C(256.50), SIMDE_FLOAT32_C(-92.04)}, + INT32_C(1), + {SIMDE_FLOAT32_C(8569.627930), SIMDE_FLOAT32_C(8744.038086)}}}; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x2_t r; - SIMDE_CONSTIFY_2_(simde_vcmla_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_( + simde_vcmla_laneq_f32, r, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), + test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; + for (int i = 0; i < 8; i++) { simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vcmla_laneq_f32(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -345,8 +427,7 @@ test_simde_vcmla_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmlaq_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmlaq_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float16_t r_[8]; @@ -355,96 +436,155 @@ test_simde_vcmlaq_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[8]; } test_vec[] = { - - { { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( 420.50), - SIMDE_FLOAT16_VALUE( -699.00), SIMDE_FLOAT16_VALUE( -151.12), SIMDE_FLOAT16_VALUE( -388.25), SIMDE_FLOAT16_VALUE( 350.75) }, - { SIMDE_FLOAT16_VALUE( 890.50), SIMDE_FLOAT16_VALUE( -665.00), SIMDE_FLOAT16_VALUE( -618.00), SIMDE_FLOAT16_VALUE( 190.88), - SIMDE_FLOAT16_VALUE( 218.12), SIMDE_FLOAT16_VALUE( -329.00), SIMDE_FLOAT16_VALUE( -549.50), SIMDE_FLOAT16_VALUE( -460.00) }, - { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -695.50), SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-43648.000000), SIMDE_FLOAT16_VALUE(-43712.000000), SIMDE_FLOAT16_VALUE(30640.000000), SIMDE_FLOAT16_VALUE(30880.000000), - SIMDE_FLOAT16_VALUE(-11448.000000), SIMDE_FLOAT16_VALUE(-10904.000000), SIMDE_FLOAT16_VALUE(26688.000000), SIMDE_FLOAT16_VALUE(27424.000000) } }, - { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00), - SIMDE_FLOAT16_VALUE( -786.50), SIMDE_FLOAT16_VALUE( -974.00), SIMDE_FLOAT16_VALUE( -570.50), SIMDE_FLOAT16_VALUE( 369.25) }, - { SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( -640.00), SIMDE_FLOAT16_VALUE( -552.00), SIMDE_FLOAT16_VALUE( 75.88), - SIMDE_FLOAT16_VALUE( 352.25), SIMDE_FLOAT16_VALUE( 324.25), SIMDE_FLOAT16_VALUE( 485.50), SIMDE_FLOAT16_VALUE( 943.50) }, - { SIMDE_FLOAT16_VALUE( -976.00), SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -100.62), SIMDE_FLOAT16_VALUE( -463.75) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(3430.000000), SIMDE_FLOAT16_VALUE(3588.000000), SIMDE_FLOAT16_VALUE(-48928.000000), SIMDE_FLOAT16_VALUE(-48800.000000), - SIMDE_FLOAT16_VALUE(30720.000000), SIMDE_FLOAT16_VALUE(30528.000000), SIMDE_FLOAT16_VALUE(42848.000000), SIMDE_FLOAT16_VALUE(43776.000000) } }, - { { SIMDE_FLOAT16_VALUE( -973.00), SIMDE_FLOAT16_VALUE( -159.62), SIMDE_FLOAT16_VALUE( -377.00), SIMDE_FLOAT16_VALUE( -853.00), - SIMDE_FLOAT16_VALUE( 831.00), SIMDE_FLOAT16_VALUE( 661.00), SIMDE_FLOAT16_VALUE( -717.00), SIMDE_FLOAT16_VALUE( 891.50) }, - { SIMDE_FLOAT16_VALUE( 535.50), SIMDE_FLOAT16_VALUE( 621.50), SIMDE_FLOAT16_VALUE( 578.50), SIMDE_FLOAT16_VALUE( 430.50), - SIMDE_FLOAT16_VALUE( -574.50), SIMDE_FLOAT16_VALUE( -319.75), SIMDE_FLOAT16_VALUE( -735.00), SIMDE_FLOAT16_VALUE( 248.50) }, - { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 192.38), SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 502.25) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(16480.000000), SIMDE_FLOAT16_VALUE(17296.000000), SIMDE_FLOAT16_VALUE(18480.000000), SIMDE_FLOAT16_VALUE(18000.000000), - SIMDE_FLOAT16_VALUE(-17888.000000), SIMDE_FLOAT16_VALUE(-18064.000000), SIMDE_FLOAT16_VALUE(-24672.000000), SIMDE_FLOAT16_VALUE(-23072.000000) } }, - { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( -70.81), SIMDE_FLOAT16_VALUE( -596.50), SIMDE_FLOAT16_VALUE( 810.50), - SIMDE_FLOAT16_VALUE( 863.50), SIMDE_FLOAT16_VALUE( 828.50), SIMDE_FLOAT16_VALUE( -563.50), SIMDE_FLOAT16_VALUE( -576.50) }, - { SIMDE_FLOAT16_VALUE( -703.50), SIMDE_FLOAT16_VALUE( 384.00), SIMDE_FLOAT16_VALUE( -772.50), SIMDE_FLOAT16_VALUE( 457.50), - SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( 653.00), SIMDE_FLOAT16_VALUE( -121.00), SIMDE_FLOAT16_VALUE( 945.50) }, - { SIMDE_FLOAT16_VALUE( -280.75), SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( 688.50), SIMDE_FLOAT16_VALUE( 192.00) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(21504.000000), SIMDE_FLOAT16_VALUE(22064.000000), SIMDE_FLOAT16_VALUE(23696.000000), SIMDE_FLOAT16_VALUE(25104.000000), - SIMDE_FLOAT16_VALUE(-8448.000000), SIMDE_FLOAT16_VALUE(-8480.000000), SIMDE_FLOAT16_VALUE(3242.000000), SIMDE_FLOAT16_VALUE(3230.000000) } }, - { { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -323.75), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -283.75), - SIMDE_FLOAT16_VALUE( -117.75), SIMDE_FLOAT16_VALUE( -841.50), SIMDE_FLOAT16_VALUE( 665.00), SIMDE_FLOAT16_VALUE( -987.00) }, - { SIMDE_FLOAT16_VALUE( -643.00), SIMDE_FLOAT16_VALUE( -152.12), SIMDE_FLOAT16_VALUE( 964.00), SIMDE_FLOAT16_VALUE( 920.00), - SIMDE_FLOAT16_VALUE( 630.50), SIMDE_FLOAT16_VALUE( -669.50), SIMDE_FLOAT16_VALUE( 671.00), SIMDE_FLOAT16_VALUE( 257.00) }, - { SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -857.50), SIMDE_FLOAT16_VALUE( 334.75), SIMDE_FLOAT16_VALUE( -617.50) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-7092.000000), SIMDE_FLOAT16_VALUE(-6896.000000), SIMDE_FLOAT16_VALUE(8960.000000), SIMDE_FLOAT16_VALUE(9568.000000), - SIMDE_FLOAT16_VALUE(6324.000000), SIMDE_FLOAT16_VALUE(5600.000000), SIMDE_FLOAT16_VALUE(7520.000000), SIMDE_FLOAT16_VALUE(5868.000000) } }, - { { SIMDE_FLOAT16_VALUE( -439.50), SIMDE_FLOAT16_VALUE( 245.12), SIMDE_FLOAT16_VALUE( 111.06), SIMDE_FLOAT16_VALUE( 520.50), - SIMDE_FLOAT16_VALUE( 85.50), SIMDE_FLOAT16_VALUE( 250.25), SIMDE_FLOAT16_VALUE( -680.00), SIMDE_FLOAT16_VALUE( -750.00) }, - { SIMDE_FLOAT16_VALUE( -138.25), SIMDE_FLOAT16_VALUE( -14.62), SIMDE_FLOAT16_VALUE( -921.50), SIMDE_FLOAT16_VALUE( 225.88), - SIMDE_FLOAT16_VALUE( 242.88), SIMDE_FLOAT16_VALUE( 869.50), SIMDE_FLOAT16_VALUE( 298.00), SIMDE_FLOAT16_VALUE( 105.69) }, - { SIMDE_FLOAT16_VALUE( -722.50), SIMDE_FLOAT16_VALUE( -8.75), SIMDE_FLOAT16_VALUE( -245.75), SIMDE_FLOAT16_VALUE( 915.50) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(770.000000), SIMDE_FLOAT16_VALUE(1455.000000), SIMDE_FLOAT16_VALUE(8176.000000), SIMDE_FLOAT16_VALUE(8584.000000), - SIMDE_FLOAT16_VALUE(-2040.000000), SIMDE_FLOAT16_VALUE(-1875.000000), SIMDE_FLOAT16_VALUE(-3288.000000), SIMDE_FLOAT16_VALUE(-3358.000000) } }, - { { SIMDE_FLOAT16_VALUE( 54.19), SIMDE_FLOAT16_VALUE( -928.00), SIMDE_FLOAT16_VALUE( 362.50), SIMDE_FLOAT16_VALUE( -936.50), - SIMDE_FLOAT16_VALUE( 185.88), SIMDE_FLOAT16_VALUE( -244.38), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( -644.00) }, - { SIMDE_FLOAT16_VALUE( -517.00), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -751.50), SIMDE_FLOAT16_VALUE( -974.00), - SIMDE_FLOAT16_VALUE( -144.38), SIMDE_FLOAT16_VALUE( 338.25), SIMDE_FLOAT16_VALUE( 705.00), SIMDE_FLOAT16_VALUE( 116.88) }, - { SIMDE_FLOAT16_VALUE( 49.38), SIMDE_FLOAT16_VALUE( -363.00), SIMDE_FLOAT16_VALUE( -476.25), SIMDE_FLOAT16_VALUE( 106.69) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-25472.000000), SIMDE_FLOAT16_VALUE(-26448.000000), SIMDE_FLOAT16_VALUE(-36736.000000), SIMDE_FLOAT16_VALUE(-38048.000000), - SIMDE_FLOAT16_VALUE(-6944.000000), SIMDE_FLOAT16_VALUE(-7372.000000), SIMDE_FLOAT16_VALUE(35744.000000), SIMDE_FLOAT16_VALUE(34176.000000) } }, - { { SIMDE_FLOAT16_VALUE( -726.00), SIMDE_FLOAT16_VALUE( -353.75), SIMDE_FLOAT16_VALUE( 268.50), SIMDE_FLOAT16_VALUE( 729.00), - SIMDE_FLOAT16_VALUE( -470.25), SIMDE_FLOAT16_VALUE( 81.88), SIMDE_FLOAT16_VALUE( 72.25), SIMDE_FLOAT16_VALUE( -992.50) }, - { SIMDE_FLOAT16_VALUE( -615.50), SIMDE_FLOAT16_VALUE( 620.50), SIMDE_FLOAT16_VALUE( -606.50), SIMDE_FLOAT16_VALUE( -327.75), - SIMDE_FLOAT16_VALUE( -331.75), SIMDE_FLOAT16_VALUE( -606.00), SIMDE_FLOAT16_VALUE( -295.75), SIMDE_FLOAT16_VALUE( -275.50) }, - { SIMDE_FLOAT16_VALUE( -752.50), SIMDE_FLOAT16_VALUE( 21.91), SIMDE_FLOAT16_VALUE( 827.00), SIMDE_FLOAT16_VALUE( 600.00) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(-14208.000000), SIMDE_FLOAT16_VALUE(-13840.000000), SIMDE_FLOAT16_VALUE(-13016.000000), SIMDE_FLOAT16_VALUE(-12560.000000), - SIMDE_FLOAT16_VALUE(-7736.000000), SIMDE_FLOAT16_VALUE(-7184.000000), SIMDE_FLOAT16_VALUE(-6408.000000), SIMDE_FLOAT16_VALUE(-7472.000000) } } - + + {{SIMDE_FLOAT16_VALUE(226.38), SIMDE_FLOAT16_VALUE(178.88), + SIMDE_FLOAT16_VALUE(181.75), SIMDE_FLOAT16_VALUE(420.50), + SIMDE_FLOAT16_VALUE(-699.00), SIMDE_FLOAT16_VALUE(-151.12), + SIMDE_FLOAT16_VALUE(-388.25), SIMDE_FLOAT16_VALUE(350.75)}, + {SIMDE_FLOAT16_VALUE(890.50), SIMDE_FLOAT16_VALUE(-665.00), + SIMDE_FLOAT16_VALUE(-618.00), SIMDE_FLOAT16_VALUE(190.88), + SIMDE_FLOAT16_VALUE(218.12), SIMDE_FLOAT16_VALUE(-329.00), + SIMDE_FLOAT16_VALUE(-549.50), SIMDE_FLOAT16_VALUE(-460.00)}, + {SIMDE_FLOAT16_VALUE(-49.28), SIMDE_FLOAT16_VALUE(-695.50), + SIMDE_FLOAT16_VALUE(121.50), SIMDE_FLOAT16_VALUE(-78.44)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-43648.000000), SIMDE_FLOAT16_VALUE(-43712.000000), + SIMDE_FLOAT16_VALUE(30640.000000), SIMDE_FLOAT16_VALUE(30880.000000), + SIMDE_FLOAT16_VALUE(-11448.000000), SIMDE_FLOAT16_VALUE(-10904.000000), + SIMDE_FLOAT16_VALUE(26688.000000), SIMDE_FLOAT16_VALUE(27424.000000)}}, + {{SIMDE_FLOAT16_VALUE(-869.50), SIMDE_FLOAT16_VALUE(-710.00), + SIMDE_FLOAT16_VALUE(457.25), SIMDE_FLOAT16_VALUE(579.00), + SIMDE_FLOAT16_VALUE(-786.50), SIMDE_FLOAT16_VALUE(-974.00), + SIMDE_FLOAT16_VALUE(-570.50), SIMDE_FLOAT16_VALUE(369.25)}, + {SIMDE_FLOAT16_VALUE(48.06), SIMDE_FLOAT16_VALUE(-640.00), + SIMDE_FLOAT16_VALUE(-552.00), SIMDE_FLOAT16_VALUE(75.88), + SIMDE_FLOAT16_VALUE(352.25), SIMDE_FLOAT16_VALUE(324.25), + SIMDE_FLOAT16_VALUE(485.50), SIMDE_FLOAT16_VALUE(943.50)}, + {SIMDE_FLOAT16_VALUE(-976.00), SIMDE_FLOAT16_VALUE(89.44), + SIMDE_FLOAT16_VALUE(-100.62), SIMDE_FLOAT16_VALUE(-463.75)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(3430.000000), SIMDE_FLOAT16_VALUE(3588.000000), + SIMDE_FLOAT16_VALUE(-48928.000000), SIMDE_FLOAT16_VALUE(-48800.000000), + SIMDE_FLOAT16_VALUE(30720.000000), SIMDE_FLOAT16_VALUE(30528.000000), + SIMDE_FLOAT16_VALUE(42848.000000), SIMDE_FLOAT16_VALUE(43776.000000)}}, + {{SIMDE_FLOAT16_VALUE(-973.00), SIMDE_FLOAT16_VALUE(-159.62), + SIMDE_FLOAT16_VALUE(-377.00), SIMDE_FLOAT16_VALUE(-853.00), + SIMDE_FLOAT16_VALUE(831.00), SIMDE_FLOAT16_VALUE(661.00), + SIMDE_FLOAT16_VALUE(-717.00), SIMDE_FLOAT16_VALUE(891.50)}, + {SIMDE_FLOAT16_VALUE(535.50), SIMDE_FLOAT16_VALUE(621.50), + SIMDE_FLOAT16_VALUE(578.50), SIMDE_FLOAT16_VALUE(430.50), + SIMDE_FLOAT16_VALUE(-574.50), SIMDE_FLOAT16_VALUE(-319.75), + SIMDE_FLOAT16_VALUE(-735.00), SIMDE_FLOAT16_VALUE(248.50)}, + {SIMDE_FLOAT16_VALUE(32.59), SIMDE_FLOAT16_VALUE(192.38), + SIMDE_FLOAT16_VALUE(679.00), SIMDE_FLOAT16_VALUE(502.25)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(16480.000000), SIMDE_FLOAT16_VALUE(17296.000000), + SIMDE_FLOAT16_VALUE(18480.000000), SIMDE_FLOAT16_VALUE(18000.000000), + SIMDE_FLOAT16_VALUE(-17888.000000), SIMDE_FLOAT16_VALUE(-18064.000000), + SIMDE_FLOAT16_VALUE(-24672.000000), + SIMDE_FLOAT16_VALUE(-23072.000000)}}, + {{SIMDE_FLOAT16_VALUE(-618.50), SIMDE_FLOAT16_VALUE(-70.81), + SIMDE_FLOAT16_VALUE(-596.50), SIMDE_FLOAT16_VALUE(810.50), + SIMDE_FLOAT16_VALUE(863.50), SIMDE_FLOAT16_VALUE(828.50), + SIMDE_FLOAT16_VALUE(-563.50), SIMDE_FLOAT16_VALUE(-576.50)}, + {SIMDE_FLOAT16_VALUE(-703.50), SIMDE_FLOAT16_VALUE(384.00), + SIMDE_FLOAT16_VALUE(-772.50), SIMDE_FLOAT16_VALUE(457.50), + SIMDE_FLOAT16_VALUE(296.00), SIMDE_FLOAT16_VALUE(653.00), + SIMDE_FLOAT16_VALUE(-121.00), SIMDE_FLOAT16_VALUE(945.50)}, + {SIMDE_FLOAT16_VALUE(-280.75), SIMDE_FLOAT16_VALUE(-31.45), + SIMDE_FLOAT16_VALUE(688.50), SIMDE_FLOAT16_VALUE(192.00)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(21504.000000), SIMDE_FLOAT16_VALUE(22064.000000), + SIMDE_FLOAT16_VALUE(23696.000000), SIMDE_FLOAT16_VALUE(25104.000000), + SIMDE_FLOAT16_VALUE(-8448.000000), SIMDE_FLOAT16_VALUE(-8480.000000), + SIMDE_FLOAT16_VALUE(3242.000000), SIMDE_FLOAT16_VALUE(3230.000000)}}, + {{SIMDE_FLOAT16_VALUE(-523.00), SIMDE_FLOAT16_VALUE(-323.75), + SIMDE_FLOAT16_VALUE(-888.00), SIMDE_FLOAT16_VALUE(-283.75), + SIMDE_FLOAT16_VALUE(-117.75), SIMDE_FLOAT16_VALUE(-841.50), + SIMDE_FLOAT16_VALUE(665.00), SIMDE_FLOAT16_VALUE(-987.00)}, + {SIMDE_FLOAT16_VALUE(-643.00), SIMDE_FLOAT16_VALUE(-152.12), + SIMDE_FLOAT16_VALUE(964.00), SIMDE_FLOAT16_VALUE(920.00), + SIMDE_FLOAT16_VALUE(630.50), SIMDE_FLOAT16_VALUE(-669.50), + SIMDE_FLOAT16_VALUE(671.00), SIMDE_FLOAT16_VALUE(257.00)}, + {SIMDE_FLOAT16_VALUE(10.22), SIMDE_FLOAT16_VALUE(-857.50), + SIMDE_FLOAT16_VALUE(334.75), SIMDE_FLOAT16_VALUE(-617.50)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-7092.000000), SIMDE_FLOAT16_VALUE(-6896.000000), + SIMDE_FLOAT16_VALUE(8960.000000), SIMDE_FLOAT16_VALUE(9568.000000), + SIMDE_FLOAT16_VALUE(6324.000000), SIMDE_FLOAT16_VALUE(5600.000000), + SIMDE_FLOAT16_VALUE(7520.000000), SIMDE_FLOAT16_VALUE(5868.000000)}}, + {{SIMDE_FLOAT16_VALUE(-439.50), SIMDE_FLOAT16_VALUE(245.12), + SIMDE_FLOAT16_VALUE(111.06), SIMDE_FLOAT16_VALUE(520.50), + SIMDE_FLOAT16_VALUE(85.50), SIMDE_FLOAT16_VALUE(250.25), + SIMDE_FLOAT16_VALUE(-680.00), SIMDE_FLOAT16_VALUE(-750.00)}, + {SIMDE_FLOAT16_VALUE(-138.25), SIMDE_FLOAT16_VALUE(-14.62), + SIMDE_FLOAT16_VALUE(-921.50), SIMDE_FLOAT16_VALUE(225.88), + SIMDE_FLOAT16_VALUE(242.88), SIMDE_FLOAT16_VALUE(869.50), + SIMDE_FLOAT16_VALUE(298.00), SIMDE_FLOAT16_VALUE(105.69)}, + {SIMDE_FLOAT16_VALUE(-722.50), SIMDE_FLOAT16_VALUE(-8.75), + SIMDE_FLOAT16_VALUE(-245.75), SIMDE_FLOAT16_VALUE(915.50)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(770.000000), SIMDE_FLOAT16_VALUE(1455.000000), + SIMDE_FLOAT16_VALUE(8176.000000), SIMDE_FLOAT16_VALUE(8584.000000), + SIMDE_FLOAT16_VALUE(-2040.000000), SIMDE_FLOAT16_VALUE(-1875.000000), + SIMDE_FLOAT16_VALUE(-3288.000000), SIMDE_FLOAT16_VALUE(-3358.000000)}}, + {{SIMDE_FLOAT16_VALUE(54.19), SIMDE_FLOAT16_VALUE(-928.00), + SIMDE_FLOAT16_VALUE(362.50), SIMDE_FLOAT16_VALUE(-936.50), + SIMDE_FLOAT16_VALUE(185.88), SIMDE_FLOAT16_VALUE(-244.38), + SIMDE_FLOAT16_VALUE(924.50), SIMDE_FLOAT16_VALUE(-644.00)}, + {SIMDE_FLOAT16_VALUE(-517.00), SIMDE_FLOAT16_VALUE(-615.00), + SIMDE_FLOAT16_VALUE(-751.50), SIMDE_FLOAT16_VALUE(-974.00), + SIMDE_FLOAT16_VALUE(-144.38), SIMDE_FLOAT16_VALUE(338.25), + SIMDE_FLOAT16_VALUE(705.00), SIMDE_FLOAT16_VALUE(116.88)}, + {SIMDE_FLOAT16_VALUE(49.38), SIMDE_FLOAT16_VALUE(-363.00), + SIMDE_FLOAT16_VALUE(-476.25), SIMDE_FLOAT16_VALUE(106.69)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-25472.000000), SIMDE_FLOAT16_VALUE(-26448.000000), + SIMDE_FLOAT16_VALUE(-36736.000000), SIMDE_FLOAT16_VALUE(-38048.000000), + SIMDE_FLOAT16_VALUE(-6944.000000), SIMDE_FLOAT16_VALUE(-7372.000000), + SIMDE_FLOAT16_VALUE(35744.000000), SIMDE_FLOAT16_VALUE(34176.000000)}}, + {{SIMDE_FLOAT16_VALUE(-726.00), SIMDE_FLOAT16_VALUE(-353.75), + SIMDE_FLOAT16_VALUE(268.50), SIMDE_FLOAT16_VALUE(729.00), + SIMDE_FLOAT16_VALUE(-470.25), SIMDE_FLOAT16_VALUE(81.88), + SIMDE_FLOAT16_VALUE(72.25), SIMDE_FLOAT16_VALUE(-992.50)}, + {SIMDE_FLOAT16_VALUE(-615.50), SIMDE_FLOAT16_VALUE(620.50), + SIMDE_FLOAT16_VALUE(-606.50), SIMDE_FLOAT16_VALUE(-327.75), + SIMDE_FLOAT16_VALUE(-331.75), SIMDE_FLOAT16_VALUE(-606.00), + SIMDE_FLOAT16_VALUE(-295.75), SIMDE_FLOAT16_VALUE(-275.50)}, + {SIMDE_FLOAT16_VALUE(-752.50), SIMDE_FLOAT16_VALUE(21.91), + SIMDE_FLOAT16_VALUE(827.00), SIMDE_FLOAT16_VALUE(600.00)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-14208.000000), SIMDE_FLOAT16_VALUE(-13840.000000), + SIMDE_FLOAT16_VALUE(-13016.000000), SIMDE_FLOAT16_VALUE(-12560.000000), + SIMDE_FLOAT16_VALUE(-7736.000000), SIMDE_FLOAT16_VALUE(-7184.000000), + SIMDE_FLOAT16_VALUE(-6408.000000), SIMDE_FLOAT16_VALUE(-7472.000000)}} + }; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x8_t r; - SIMDE_CONSTIFY_2_(simde_vcmlaq_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); - - simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); - + SIMDE_CONSTIFY_2_( + simde_vcmlaq_lane_f16, r, + (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + test_vec[i].lane, r_, a, b); + + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), + 1); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; + for (int i = 0; i < 8; i++) { simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x8_t r = simde_vcmlaq_lane_f16(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -455,8 +595,7 @@ test_simde_vcmlaq_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmlaq_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmlaq_lane_f32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[4]; @@ -465,68 +604,91 @@ test_simde_vcmlaq_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[4]; } test_vec[] = { - { { SIMDE_FLOAT32_C( 301.65), SIMDE_FLOAT32_C( 490.71), SIMDE_FLOAT32_C( -744.66), SIMDE_FLOAT32_C( -738.17) }, - { SIMDE_FLOAT32_C( -301.20), SIMDE_FLOAT32_C( -904.34), SIMDE_FLOAT32_C( 771.98), SIMDE_FLOAT32_C( 233.71) }, - { SIMDE_FLOAT32_C( 830.18), SIMDE_FLOAT32_C( 979.39) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-249748.578125), SIMDE_FLOAT32_C(-249559.515625), SIMDE_FLOAT32_C(640137.687500), SIMDE_FLOAT32_C(640144.187500) } }, - { { SIMDE_FLOAT32_C( -38.01), SIMDE_FLOAT32_C( 151.61), SIMDE_FLOAT32_C( 201.45), SIMDE_FLOAT32_C( -747.32) }, - { SIMDE_FLOAT32_C( -331.17), SIMDE_FLOAT32_C( 7.62), SIMDE_FLOAT32_C( -454.77), SIMDE_FLOAT32_C( -381.81) }, - { SIMDE_FLOAT32_C( 236.39), SIMDE_FLOAT32_C( -158.94) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-78323.289062), SIMDE_FLOAT32_C(-78133.671875), SIMDE_FLOAT32_C(-107301.625000), SIMDE_FLOAT32_C(-108250.398438) } }, - { { SIMDE_FLOAT32_C( 605.85), SIMDE_FLOAT32_C( 244.27), SIMDE_FLOAT32_C( -426.53), SIMDE_FLOAT32_C( -969.18) }, - { SIMDE_FLOAT32_C( 322.13), SIMDE_FLOAT32_C( 863.77), SIMDE_FLOAT32_C( -685.35), SIMDE_FLOAT32_C( -710.70) }, - { SIMDE_FLOAT32_C( -791.12), SIMDE_FLOAT32_C( 373.53) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-254237.640625), SIMDE_FLOAT32_C(-254599.218750), SIMDE_FLOAT32_C(541767.562500), SIMDE_FLOAT32_C(541224.875000) } }, - { { SIMDE_FLOAT32_C( -606.46), SIMDE_FLOAT32_C( -507.55), SIMDE_FLOAT32_C( -68.24), SIMDE_FLOAT32_C( -823.05) }, - { SIMDE_FLOAT32_C( -359.95), SIMDE_FLOAT32_C( 611.92), SIMDE_FLOAT32_C( 514.14), SIMDE_FLOAT32_C( -660.86) }, - { SIMDE_FLOAT32_C( 181.81), SIMDE_FLOAT32_C( 115.86) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-66048.968750), SIMDE_FLOAT32_C(-65950.062500), SIMDE_FLOAT32_C(93407.554688), SIMDE_FLOAT32_C(92652.742188) } }, - { { SIMDE_FLOAT32_C( 951.84), SIMDE_FLOAT32_C( -864.96), SIMDE_FLOAT32_C( 970.59), SIMDE_FLOAT32_C( 769.97) }, - { SIMDE_FLOAT32_C( 609.64), SIMDE_FLOAT32_C( -580.87), SIMDE_FLOAT32_C( 358.59), SIMDE_FLOAT32_C( 350.56) }, - { SIMDE_FLOAT32_C( 26.31), SIMDE_FLOAT32_C( 163.74) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(16991.468750), SIMDE_FLOAT32_C(15174.667969), SIMDE_FLOAT32_C(10405.092773), SIMDE_FLOAT32_C(10204.472656) } }, - { { SIMDE_FLOAT32_C( -636.68), SIMDE_FLOAT32_C( 554.68), SIMDE_FLOAT32_C( -385.40), SIMDE_FLOAT32_C( -565.95) }, - { SIMDE_FLOAT32_C( 129.69), SIMDE_FLOAT32_C( 961.79), SIMDE_FLOAT32_C( -333.22), SIMDE_FLOAT32_C( 69.65) }, - { SIMDE_FLOAT32_C( 726.96), SIMDE_FLOAT32_C( 131.41) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(93642.765625), SIMDE_FLOAT32_C(94834.125000), SIMDE_FLOAT32_C(-242623.015625), SIMDE_FLOAT32_C(-242803.562500) } }, - { { SIMDE_FLOAT32_C( -211.91), SIMDE_FLOAT32_C( 829.24), SIMDE_FLOAT32_C( -475.13), SIMDE_FLOAT32_C( 562.33) }, - { SIMDE_FLOAT32_C( 515.59), SIMDE_FLOAT32_C( -290.69), SIMDE_FLOAT32_C( -816.53), SIMDE_FLOAT32_C( 17.39) }, - { SIMDE_FLOAT32_C( 268.14), SIMDE_FLOAT32_C( 729.88) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(138038.406250), SIMDE_FLOAT32_C(139079.562500), SIMDE_FLOAT32_C(-219419.500000), SIMDE_FLOAT32_C(-218382.046875) } }, - { { SIMDE_FLOAT32_C( -894.99), SIMDE_FLOAT32_C( 516.42), SIMDE_FLOAT32_C( -169.55), SIMDE_FLOAT32_C( 696.41) }, - { SIMDE_FLOAT32_C( -388.51), SIMDE_FLOAT32_C( 987.71), SIMDE_FLOAT32_C( -91.49), SIMDE_FLOAT32_C( -970.85) }, - { SIMDE_FLOAT32_C( 357.28), SIMDE_FLOAT32_C( -28.01) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-139701.843750), SIMDE_FLOAT32_C(-138290.437500), SIMDE_FLOAT32_C(-32857.097656), SIMDE_FLOAT32_C(-31991.136719) } } - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + {{SIMDE_FLOAT32_C(301.65), SIMDE_FLOAT32_C(490.71), + SIMDE_FLOAT32_C(-744.66), SIMDE_FLOAT32_C(-738.17)}, + {SIMDE_FLOAT32_C(-301.20), SIMDE_FLOAT32_C(-904.34), + SIMDE_FLOAT32_C(771.98), SIMDE_FLOAT32_C(233.71)}, + {SIMDE_FLOAT32_C(830.18), SIMDE_FLOAT32_C(979.39)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-249748.578125), SIMDE_FLOAT32_C(-249559.515625), + SIMDE_FLOAT32_C(640137.687500), SIMDE_FLOAT32_C(640144.187500)}}, + {{SIMDE_FLOAT32_C(-38.01), SIMDE_FLOAT32_C(151.61), + SIMDE_FLOAT32_C(201.45), SIMDE_FLOAT32_C(-747.32)}, + {SIMDE_FLOAT32_C(-331.17), SIMDE_FLOAT32_C(7.62), + SIMDE_FLOAT32_C(-454.77), SIMDE_FLOAT32_C(-381.81)}, + {SIMDE_FLOAT32_C(236.39), SIMDE_FLOAT32_C(-158.94)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-78323.289062), SIMDE_FLOAT32_C(-78133.671875), + SIMDE_FLOAT32_C(-107301.625000), SIMDE_FLOAT32_C(-108250.398438)}}, + {{SIMDE_FLOAT32_C(605.85), SIMDE_FLOAT32_C(244.27), + SIMDE_FLOAT32_C(-426.53), SIMDE_FLOAT32_C(-969.18)}, + {SIMDE_FLOAT32_C(322.13), SIMDE_FLOAT32_C(863.77), + SIMDE_FLOAT32_C(-685.35), SIMDE_FLOAT32_C(-710.70)}, + {SIMDE_FLOAT32_C(-791.12), SIMDE_FLOAT32_C(373.53)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-254237.640625), SIMDE_FLOAT32_C(-254599.218750), + SIMDE_FLOAT32_C(541767.562500), SIMDE_FLOAT32_C(541224.875000)}}, + {{SIMDE_FLOAT32_C(-606.46), SIMDE_FLOAT32_C(-507.55), + SIMDE_FLOAT32_C(-68.24), SIMDE_FLOAT32_C(-823.05)}, + {SIMDE_FLOAT32_C(-359.95), SIMDE_FLOAT32_C(611.92), + SIMDE_FLOAT32_C(514.14), SIMDE_FLOAT32_C(-660.86)}, + {SIMDE_FLOAT32_C(181.81), SIMDE_FLOAT32_C(115.86)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-66048.968750), SIMDE_FLOAT32_C(-65950.062500), + SIMDE_FLOAT32_C(93407.554688), SIMDE_FLOAT32_C(92652.742188)}}, + {{SIMDE_FLOAT32_C(951.84), SIMDE_FLOAT32_C(-864.96), + SIMDE_FLOAT32_C(970.59), SIMDE_FLOAT32_C(769.97)}, + {SIMDE_FLOAT32_C(609.64), SIMDE_FLOAT32_C(-580.87), + SIMDE_FLOAT32_C(358.59), SIMDE_FLOAT32_C(350.56)}, + {SIMDE_FLOAT32_C(26.31), SIMDE_FLOAT32_C(163.74)}, + INT32_C(0), + {SIMDE_FLOAT32_C(16991.468750), SIMDE_FLOAT32_C(15174.667969), + SIMDE_FLOAT32_C(10405.092773), SIMDE_FLOAT32_C(10204.472656)}}, + {{SIMDE_FLOAT32_C(-636.68), SIMDE_FLOAT32_C(554.68), + SIMDE_FLOAT32_C(-385.40), SIMDE_FLOAT32_C(-565.95)}, + {SIMDE_FLOAT32_C(129.69), SIMDE_FLOAT32_C(961.79), + SIMDE_FLOAT32_C(-333.22), SIMDE_FLOAT32_C(69.65)}, + {SIMDE_FLOAT32_C(726.96), SIMDE_FLOAT32_C(131.41)}, + INT32_C(0), + {SIMDE_FLOAT32_C(93642.765625), SIMDE_FLOAT32_C(94834.125000), + SIMDE_FLOAT32_C(-242623.015625), SIMDE_FLOAT32_C(-242803.562500)}}, + {{SIMDE_FLOAT32_C(-211.91), SIMDE_FLOAT32_C(829.24), + SIMDE_FLOAT32_C(-475.13), SIMDE_FLOAT32_C(562.33)}, + {SIMDE_FLOAT32_C(515.59), SIMDE_FLOAT32_C(-290.69), + SIMDE_FLOAT32_C(-816.53), SIMDE_FLOAT32_C(17.39)}, + {SIMDE_FLOAT32_C(268.14), SIMDE_FLOAT32_C(729.88)}, + INT32_C(0), + {SIMDE_FLOAT32_C(138038.406250), SIMDE_FLOAT32_C(139079.562500), + SIMDE_FLOAT32_C(-219419.500000), SIMDE_FLOAT32_C(-218382.046875)}}, + {{SIMDE_FLOAT32_C(-894.99), SIMDE_FLOAT32_C(516.42), + SIMDE_FLOAT32_C(-169.55), SIMDE_FLOAT32_C(696.41)}, + {SIMDE_FLOAT32_C(-388.51), SIMDE_FLOAT32_C(987.71), + SIMDE_FLOAT32_C(-91.49), SIMDE_FLOAT32_C(-970.85)}, + {SIMDE_FLOAT32_C(357.28), SIMDE_FLOAT32_C(-28.01)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-139701.843750), SIMDE_FLOAT32_C(-138290.437500), + SIMDE_FLOAT32_C(-32857.097656), SIMDE_FLOAT32_C(-31991.136719)}}}; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x4_t r = simde_vcmlaq_lane_f32(r_, a, b, 0); - simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), + 1); // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 0, 0, 0, 0, 0, 0, 0}; + for (int i = 0; i < 8; i++) { simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcmlaq_lane_f32(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -537,8 +699,7 @@ test_simde_vcmlaq_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmlaq_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmlaq_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float16_t r_[8]; @@ -547,103 +708,173 @@ test_simde_vcmlaq_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[8]; } test_vec[] = { - { { SIMDE_FLOAT16_VALUE( -30.36), SIMDE_FLOAT16_VALUE( 631.50), SIMDE_FLOAT16_VALUE( 851.00), SIMDE_FLOAT16_VALUE( -263.50), - SIMDE_FLOAT16_VALUE( 140.00), SIMDE_FLOAT16_VALUE( 859.00), SIMDE_FLOAT16_VALUE( -834.50), SIMDE_FLOAT16_VALUE( 216.12) }, - { SIMDE_FLOAT16_VALUE( 996.00), SIMDE_FLOAT16_VALUE( 529.50), SIMDE_FLOAT16_VALUE( 79.06), SIMDE_FLOAT16_VALUE( 947.00), - SIMDE_FLOAT16_VALUE( 122.00), SIMDE_FLOAT16_VALUE( -250.00), SIMDE_FLOAT16_VALUE( -361.75), SIMDE_FLOAT16_VALUE( 265.25) }, - { SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 2.71), SIMDE_FLOAT16_VALUE( 99.81), SIMDE_FLOAT16_VALUE( -137.62), - SIMDE_FLOAT16_VALUE( -761.00), SIMDE_FLOAT16_VALUE( 813.00), SIMDE_FLOAT16_VALUE( -897.50), SIMDE_FLOAT16_VALUE( 653.50) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(58400.000000), SIMDE_FLOAT16_VALUE(59040.000000), SIMDE_FLOAT16_VALUE(5488.000000), SIMDE_FLOAT16_VALUE(4376.000000), - SIMDE_FLOAT16_VALUE(7296.000000), SIMDE_FLOAT16_VALUE(8016.000000), SIMDE_FLOAT16_VALUE(-22048.000000), SIMDE_FLOAT16_VALUE(-21008.000000) } }, - { { SIMDE_FLOAT16_VALUE( 396.00), SIMDE_FLOAT16_VALUE( 413.00), SIMDE_FLOAT16_VALUE( 514.00), SIMDE_FLOAT16_VALUE( -977.50), - SIMDE_FLOAT16_VALUE( -672.00), SIMDE_FLOAT16_VALUE( -92.12), SIMDE_FLOAT16_VALUE( -441.25), SIMDE_FLOAT16_VALUE( -374.25) }, - { SIMDE_FLOAT16_VALUE( -152.00), SIMDE_FLOAT16_VALUE( -79.56), SIMDE_FLOAT16_VALUE( -214.62), SIMDE_FLOAT16_VALUE( -615.00), - SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 784.00), SIMDE_FLOAT16_VALUE( 493.00), SIMDE_FLOAT16_VALUE( -896.00) }, - { SIMDE_FLOAT16_VALUE( 104.12), SIMDE_FLOAT16_VALUE( 78.50), SIMDE_FLOAT16_VALUE( 171.50), SIMDE_FLOAT16_VALUE( -682.50), - SIMDE_FLOAT16_VALUE( 217.12), SIMDE_FLOAT16_VALUE( 49.34), SIMDE_FLOAT16_VALUE( 256.50), SIMDE_FLOAT16_VALUE( -92.06) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(-11536.000000), SIMDE_FLOAT16_VALUE(-11520.000000), SIMDE_FLOAT16_VALUE(-16336.000000), SIMDE_FLOAT16_VALUE(-17824.000000), - SIMDE_FLOAT16_VALUE(52640.000000), SIMDE_FLOAT16_VALUE(53216.000000), SIMDE_FLOAT16_VALUE(38272.000000), SIMDE_FLOAT16_VALUE(38336.000000) } }, - { { SIMDE_FLOAT16_VALUE( -728.00), SIMDE_FLOAT16_VALUE( -108.38), SIMDE_FLOAT16_VALUE( -77.88), SIMDE_FLOAT16_VALUE( -353.00), - SIMDE_FLOAT16_VALUE( -239.00), SIMDE_FLOAT16_VALUE( 704.50), SIMDE_FLOAT16_VALUE( 914.00), SIMDE_FLOAT16_VALUE( -211.12) }, - { SIMDE_FLOAT16_VALUE( -473.25), SIMDE_FLOAT16_VALUE( 74.38), SIMDE_FLOAT16_VALUE( 904.50), SIMDE_FLOAT16_VALUE( -290.50), - SIMDE_FLOAT16_VALUE( -796.00), SIMDE_FLOAT16_VALUE( 421.25), SIMDE_FLOAT16_VALUE( 215.75), SIMDE_FLOAT16_VALUE( 249.38) }, - { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -720.00), SIMDE_FLOAT16_VALUE( 32.75), SIMDE_FLOAT16_VALUE( -487.75), - SIMDE_FLOAT16_VALUE( -705.50), SIMDE_FLOAT16_VALUE( -468.00), SIMDE_FLOAT16_VALUE( -789.00), SIMDE_FLOAT16_VALUE( -866.00) }, - INT32_C( 2), - { SIMDE_FLOAT16_VALUE(-16224.000000), SIMDE_FLOAT16_VALUE(-15608.000000), SIMDE_FLOAT16_VALUE(29552.000000), SIMDE_FLOAT16_VALUE(29264.000000), - SIMDE_FLOAT16_VALUE(-26304.000000), SIMDE_FLOAT16_VALUE(-25360.000000), SIMDE_FLOAT16_VALUE(7980.000000), SIMDE_FLOAT16_VALUE(6856.000000) } }, - { { SIMDE_FLOAT16_VALUE( -891.50), SIMDE_FLOAT16_VALUE( -299.00), SIMDE_FLOAT16_VALUE( -595.00), SIMDE_FLOAT16_VALUE( -662.00), - SIMDE_FLOAT16_VALUE( -914.00), SIMDE_FLOAT16_VALUE( 674.50), SIMDE_FLOAT16_VALUE( 771.50), SIMDE_FLOAT16_VALUE( 14.33) }, - { SIMDE_FLOAT16_VALUE( 880.00), SIMDE_FLOAT16_VALUE( 767.00), SIMDE_FLOAT16_VALUE( -738.50), SIMDE_FLOAT16_VALUE( 581.50), - SIMDE_FLOAT16_VALUE( -342.00), SIMDE_FLOAT16_VALUE( 580.50), SIMDE_FLOAT16_VALUE( 534.00), SIMDE_FLOAT16_VALUE( -671.00) }, - { SIMDE_FLOAT16_VALUE( -482.75), SIMDE_FLOAT16_VALUE( 382.25), SIMDE_FLOAT16_VALUE( 503.00), SIMDE_FLOAT16_VALUE( 35.00), - SIMDE_FLOAT16_VALUE( 315.50), SIMDE_FLOAT16_VALUE( -23.56), SIMDE_FLOAT16_VALUE( 53.88), SIMDE_FLOAT16_VALUE( 722.00) }, - INT32_C( 3), - { SIMDE_FLOAT16_VALUE(29904.000000), SIMDE_FLOAT16_VALUE(30496.000000), SIMDE_FLOAT16_VALUE(-26448.000000), SIMDE_FLOAT16_VALUE(-26512.000000), - SIMDE_FLOAT16_VALUE(-12880.000000), SIMDE_FLOAT16_VALUE(-11296.000000), SIMDE_FLOAT16_VALUE(19456.000000), SIMDE_FLOAT16_VALUE(18704.000000) } }, - { { SIMDE_FLOAT16_VALUE( 525.50), SIMDE_FLOAT16_VALUE( -679.00), SIMDE_FLOAT16_VALUE( 491.50), SIMDE_FLOAT16_VALUE( -505.00), - SIMDE_FLOAT16_VALUE( 914.50), SIMDE_FLOAT16_VALUE( -312.00), SIMDE_FLOAT16_VALUE( -404.50), SIMDE_FLOAT16_VALUE( -634.00) }, - { SIMDE_FLOAT16_VALUE( -86.62), SIMDE_FLOAT16_VALUE( -914.50), SIMDE_FLOAT16_VALUE( -839.50), SIMDE_FLOAT16_VALUE( 817.50), - SIMDE_FLOAT16_VALUE( -187.25), SIMDE_FLOAT16_VALUE( 422.75), SIMDE_FLOAT16_VALUE( 604.50), SIMDE_FLOAT16_VALUE( -735.00) }, - { SIMDE_FLOAT16_VALUE( -21.31), SIMDE_FLOAT16_VALUE( -29.59), SIMDE_FLOAT16_VALUE( -725.00), SIMDE_FLOAT16_VALUE( -503.00), - SIMDE_FLOAT16_VALUE( -75.56), SIMDE_FLOAT16_VALUE( 215.38), SIMDE_FLOAT16_VALUE( -742.00), SIMDE_FLOAT16_VALUE( -854.00) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(2372.000000), SIMDE_FLOAT16_VALUE(1167.000000), SIMDE_FLOAT16_VALUE(18384.000000), SIMDE_FLOAT16_VALUE(17392.000000), - SIMDE_FLOAT16_VALUE(4904.000000), SIMDE_FLOAT16_VALUE(3678.000000), SIMDE_FLOAT16_VALUE(-13288.000000), SIMDE_FLOAT16_VALUE(-13520.000000) } }, - { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( 164.88), SIMDE_FLOAT16_VALUE( 304.75), SIMDE_FLOAT16_VALUE( -530.50), - SIMDE_FLOAT16_VALUE( -603.50), SIMDE_FLOAT16_VALUE( 730.50), SIMDE_FLOAT16_VALUE( 46.66), SIMDE_FLOAT16_VALUE( 629.00) }, - { SIMDE_FLOAT16_VALUE( -537.00), SIMDE_FLOAT16_VALUE( 637.00), SIMDE_FLOAT16_VALUE( 884.50), SIMDE_FLOAT16_VALUE( 378.25), - SIMDE_FLOAT16_VALUE( -10.17), SIMDE_FLOAT16_VALUE( -730.00), SIMDE_FLOAT16_VALUE( -981.50), SIMDE_FLOAT16_VALUE( 453.25) }, - { SIMDE_FLOAT16_VALUE( -575.00), SIMDE_FLOAT16_VALUE( 12.88), SIMDE_FLOAT16_VALUE( -667.50), SIMDE_FLOAT16_VALUE( 380.50), - SIMDE_FLOAT16_VALUE( 374.75), SIMDE_FLOAT16_VALUE( -222.50), SIMDE_FLOAT16_VALUE( 206.88), SIMDE_FLOAT16_VALUE( 502.25) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(-7536.000000), SIMDE_FLOAT16_VALUE(-6752.000000), SIMDE_FLOAT16_VALUE(11696.000000), SIMDE_FLOAT16_VALUE(10864.000000), - SIMDE_FLOAT16_VALUE(-734.500000), SIMDE_FLOAT16_VALUE(599.500000), SIMDE_FLOAT16_VALUE(-12600.000000), SIMDE_FLOAT16_VALUE(-12016.000000) } }, - { { SIMDE_FLOAT16_VALUE( -825.50), SIMDE_FLOAT16_VALUE( -472.75), SIMDE_FLOAT16_VALUE( -531.00), SIMDE_FLOAT16_VALUE( -366.75), - SIMDE_FLOAT16_VALUE( 143.12), SIMDE_FLOAT16_VALUE( 698.50), SIMDE_FLOAT16_VALUE( 700.00), SIMDE_FLOAT16_VALUE( 498.25) }, - { SIMDE_FLOAT16_VALUE( 908.00), SIMDE_FLOAT16_VALUE( 845.50), SIMDE_FLOAT16_VALUE( -383.50), SIMDE_FLOAT16_VALUE( 383.50), - SIMDE_FLOAT16_VALUE( 357.75), SIMDE_FLOAT16_VALUE( -900.50), SIMDE_FLOAT16_VALUE( -802.00), SIMDE_FLOAT16_VALUE( 966.50) }, - { SIMDE_FLOAT16_VALUE( -993.00), SIMDE_FLOAT16_VALUE( 477.50), SIMDE_FLOAT16_VALUE( -23.00), SIMDE_FLOAT16_VALUE( 102.38), - SIMDE_FLOAT16_VALUE( 988.50), SIMDE_FLOAT16_VALUE( -311.75), SIMDE_FLOAT16_VALUE( -668.50), SIMDE_FLOAT16_VALUE( 148.25) }, - INT32_C( 2), - { SIMDE_FLOAT16_VALUE(-21712.000000), SIMDE_FLOAT16_VALUE(-21360.000000), SIMDE_FLOAT16_VALUE(8288.000000), SIMDE_FLOAT16_VALUE(8456.000000), - SIMDE_FLOAT16_VALUE(-8084.000000), SIMDE_FLOAT16_VALUE(-7528.000000), SIMDE_FLOAT16_VALUE(19152.000000), SIMDE_FLOAT16_VALUE(18944.000000) } }, - { { SIMDE_FLOAT16_VALUE( 213.88), SIMDE_FLOAT16_VALUE( 337.75), SIMDE_FLOAT16_VALUE( 330.50), SIMDE_FLOAT16_VALUE( -88.56), - SIMDE_FLOAT16_VALUE( 191.12), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 775.50) }, - { SIMDE_FLOAT16_VALUE( 295.50), SIMDE_FLOAT16_VALUE( 687.00), SIMDE_FLOAT16_VALUE( 406.25), SIMDE_FLOAT16_VALUE( 439.50), - SIMDE_FLOAT16_VALUE( -827.50), SIMDE_FLOAT16_VALUE( 733.00), SIMDE_FLOAT16_VALUE( 499.00), SIMDE_FLOAT16_VALUE( 931.00) }, - { SIMDE_FLOAT16_VALUE( 790.00), SIMDE_FLOAT16_VALUE( -979.00), SIMDE_FLOAT16_VALUE( 70.62), SIMDE_FLOAT16_VALUE( -47.00), - SIMDE_FLOAT16_VALUE( 228.50), SIMDE_FLOAT16_VALUE( -233.50), SIMDE_FLOAT16_VALUE( -467.50), SIMDE_FLOAT16_VALUE( 545.00) }, - INT32_C( 3), - { SIMDE_FLOAT16_VALUE(-13672.000000), SIMDE_FLOAT16_VALUE(-13552.000000), SIMDE_FLOAT16_VALUE(-18768.000000), SIMDE_FLOAT16_VALUE(-19184.000000), - SIMDE_FLOAT16_VALUE(39072.000000), SIMDE_FLOAT16_VALUE(38272.000000), SIMDE_FLOAT16_VALUE(-23568.000000), SIMDE_FLOAT16_VALUE(-22672.000000) } } - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + {{SIMDE_FLOAT16_VALUE(-30.36), SIMDE_FLOAT16_VALUE(631.50), + SIMDE_FLOAT16_VALUE(851.00), SIMDE_FLOAT16_VALUE(-263.50), + SIMDE_FLOAT16_VALUE(140.00), SIMDE_FLOAT16_VALUE(859.00), + SIMDE_FLOAT16_VALUE(-834.50), SIMDE_FLOAT16_VALUE(216.12)}, + {SIMDE_FLOAT16_VALUE(996.00), SIMDE_FLOAT16_VALUE(529.50), + SIMDE_FLOAT16_VALUE(79.06), SIMDE_FLOAT16_VALUE(947.00), + SIMDE_FLOAT16_VALUE(122.00), SIMDE_FLOAT16_VALUE(-250.00), + SIMDE_FLOAT16_VALUE(-361.75), SIMDE_FLOAT16_VALUE(265.25)}, + {SIMDE_FLOAT16_VALUE(58.66), SIMDE_FLOAT16_VALUE(2.71), + SIMDE_FLOAT16_VALUE(99.81), SIMDE_FLOAT16_VALUE(-137.62), + SIMDE_FLOAT16_VALUE(-761.00), SIMDE_FLOAT16_VALUE(813.00), + SIMDE_FLOAT16_VALUE(-897.50), SIMDE_FLOAT16_VALUE(653.50)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(58400.000000), SIMDE_FLOAT16_VALUE(59040.000000), + SIMDE_FLOAT16_VALUE(5488.000000), SIMDE_FLOAT16_VALUE(4376.000000), + SIMDE_FLOAT16_VALUE(7296.000000), SIMDE_FLOAT16_VALUE(8016.000000), + SIMDE_FLOAT16_VALUE(-22048.000000), + SIMDE_FLOAT16_VALUE(-21008.000000)}}, + {{SIMDE_FLOAT16_VALUE(396.00), SIMDE_FLOAT16_VALUE(413.00), + SIMDE_FLOAT16_VALUE(514.00), SIMDE_FLOAT16_VALUE(-977.50), + SIMDE_FLOAT16_VALUE(-672.00), SIMDE_FLOAT16_VALUE(-92.12), + SIMDE_FLOAT16_VALUE(-441.25), SIMDE_FLOAT16_VALUE(-374.25)}, + {SIMDE_FLOAT16_VALUE(-152.00), SIMDE_FLOAT16_VALUE(-79.56), + SIMDE_FLOAT16_VALUE(-214.62), SIMDE_FLOAT16_VALUE(-615.00), + SIMDE_FLOAT16_VALUE(679.00), SIMDE_FLOAT16_VALUE(784.00), + SIMDE_FLOAT16_VALUE(493.00), SIMDE_FLOAT16_VALUE(-896.00)}, + {SIMDE_FLOAT16_VALUE(104.12), SIMDE_FLOAT16_VALUE(78.50), + SIMDE_FLOAT16_VALUE(171.50), SIMDE_FLOAT16_VALUE(-682.50), + SIMDE_FLOAT16_VALUE(217.12), SIMDE_FLOAT16_VALUE(49.34), + SIMDE_FLOAT16_VALUE(256.50), SIMDE_FLOAT16_VALUE(-92.06)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-11536.000000), SIMDE_FLOAT16_VALUE(-11520.000000), + SIMDE_FLOAT16_VALUE(-16336.000000), SIMDE_FLOAT16_VALUE(-17824.000000), + SIMDE_FLOAT16_VALUE(52640.000000), SIMDE_FLOAT16_VALUE(53216.000000), + SIMDE_FLOAT16_VALUE(38272.000000), SIMDE_FLOAT16_VALUE(38336.000000)}}, + {{SIMDE_FLOAT16_VALUE(-728.00), SIMDE_FLOAT16_VALUE(-108.38), + SIMDE_FLOAT16_VALUE(-77.88), SIMDE_FLOAT16_VALUE(-353.00), + SIMDE_FLOAT16_VALUE(-239.00), SIMDE_FLOAT16_VALUE(704.50), + SIMDE_FLOAT16_VALUE(914.00), SIMDE_FLOAT16_VALUE(-211.12)}, + {SIMDE_FLOAT16_VALUE(-473.25), SIMDE_FLOAT16_VALUE(74.38), + SIMDE_FLOAT16_VALUE(904.50), SIMDE_FLOAT16_VALUE(-290.50), + SIMDE_FLOAT16_VALUE(-796.00), SIMDE_FLOAT16_VALUE(421.25), + SIMDE_FLOAT16_VALUE(215.75), SIMDE_FLOAT16_VALUE(249.38)}, + {SIMDE_FLOAT16_VALUE(-523.00), SIMDE_FLOAT16_VALUE(-720.00), + SIMDE_FLOAT16_VALUE(32.75), SIMDE_FLOAT16_VALUE(-487.75), + SIMDE_FLOAT16_VALUE(-705.50), SIMDE_FLOAT16_VALUE(-468.00), + SIMDE_FLOAT16_VALUE(-789.00), SIMDE_FLOAT16_VALUE(-866.00)}, + INT32_C(2), + {SIMDE_FLOAT16_VALUE(-16224.000000), SIMDE_FLOAT16_VALUE(-15608.000000), + SIMDE_FLOAT16_VALUE(29552.000000), SIMDE_FLOAT16_VALUE(29264.000000), + SIMDE_FLOAT16_VALUE(-26304.000000), SIMDE_FLOAT16_VALUE(-25360.000000), + SIMDE_FLOAT16_VALUE(7980.000000), SIMDE_FLOAT16_VALUE(6856.000000)}}, + {{SIMDE_FLOAT16_VALUE(-891.50), SIMDE_FLOAT16_VALUE(-299.00), + SIMDE_FLOAT16_VALUE(-595.00), SIMDE_FLOAT16_VALUE(-662.00), + SIMDE_FLOAT16_VALUE(-914.00), SIMDE_FLOAT16_VALUE(674.50), + SIMDE_FLOAT16_VALUE(771.50), SIMDE_FLOAT16_VALUE(14.33)}, + {SIMDE_FLOAT16_VALUE(880.00), SIMDE_FLOAT16_VALUE(767.00), + SIMDE_FLOAT16_VALUE(-738.50), SIMDE_FLOAT16_VALUE(581.50), + SIMDE_FLOAT16_VALUE(-342.00), SIMDE_FLOAT16_VALUE(580.50), + SIMDE_FLOAT16_VALUE(534.00), SIMDE_FLOAT16_VALUE(-671.00)}, + {SIMDE_FLOAT16_VALUE(-482.75), SIMDE_FLOAT16_VALUE(382.25), + SIMDE_FLOAT16_VALUE(503.00), SIMDE_FLOAT16_VALUE(35.00), + SIMDE_FLOAT16_VALUE(315.50), SIMDE_FLOAT16_VALUE(-23.56), + SIMDE_FLOAT16_VALUE(53.88), SIMDE_FLOAT16_VALUE(722.00)}, + INT32_C(3), + {SIMDE_FLOAT16_VALUE(29904.000000), SIMDE_FLOAT16_VALUE(30496.000000), + SIMDE_FLOAT16_VALUE(-26448.000000), SIMDE_FLOAT16_VALUE(-26512.000000), + SIMDE_FLOAT16_VALUE(-12880.000000), SIMDE_FLOAT16_VALUE(-11296.000000), + SIMDE_FLOAT16_VALUE(19456.000000), SIMDE_FLOAT16_VALUE(18704.000000)}}, + {{SIMDE_FLOAT16_VALUE(525.50), SIMDE_FLOAT16_VALUE(-679.00), + SIMDE_FLOAT16_VALUE(491.50), SIMDE_FLOAT16_VALUE(-505.00), + SIMDE_FLOAT16_VALUE(914.50), SIMDE_FLOAT16_VALUE(-312.00), + SIMDE_FLOAT16_VALUE(-404.50), SIMDE_FLOAT16_VALUE(-634.00)}, + {SIMDE_FLOAT16_VALUE(-86.62), SIMDE_FLOAT16_VALUE(-914.50), + SIMDE_FLOAT16_VALUE(-839.50), SIMDE_FLOAT16_VALUE(817.50), + SIMDE_FLOAT16_VALUE(-187.25), SIMDE_FLOAT16_VALUE(422.75), + SIMDE_FLOAT16_VALUE(604.50), SIMDE_FLOAT16_VALUE(-735.00)}, + {SIMDE_FLOAT16_VALUE(-21.31), SIMDE_FLOAT16_VALUE(-29.59), + SIMDE_FLOAT16_VALUE(-725.00), SIMDE_FLOAT16_VALUE(-503.00), + SIMDE_FLOAT16_VALUE(-75.56), SIMDE_FLOAT16_VALUE(215.38), + SIMDE_FLOAT16_VALUE(-742.00), SIMDE_FLOAT16_VALUE(-854.00)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(2372.000000), SIMDE_FLOAT16_VALUE(1167.000000), + SIMDE_FLOAT16_VALUE(18384.000000), SIMDE_FLOAT16_VALUE(17392.000000), + SIMDE_FLOAT16_VALUE(4904.000000), SIMDE_FLOAT16_VALUE(3678.000000), + SIMDE_FLOAT16_VALUE(-13288.000000), + SIMDE_FLOAT16_VALUE(-13520.000000)}}, + {{SIMDE_FLOAT16_VALUE(-618.50), SIMDE_FLOAT16_VALUE(164.88), + SIMDE_FLOAT16_VALUE(304.75), SIMDE_FLOAT16_VALUE(-530.50), + SIMDE_FLOAT16_VALUE(-603.50), SIMDE_FLOAT16_VALUE(730.50), + SIMDE_FLOAT16_VALUE(46.66), SIMDE_FLOAT16_VALUE(629.00)}, + {SIMDE_FLOAT16_VALUE(-537.00), SIMDE_FLOAT16_VALUE(637.00), + SIMDE_FLOAT16_VALUE(884.50), SIMDE_FLOAT16_VALUE(378.25), + SIMDE_FLOAT16_VALUE(-10.17), SIMDE_FLOAT16_VALUE(-730.00), + SIMDE_FLOAT16_VALUE(-981.50), SIMDE_FLOAT16_VALUE(453.25)}, + {SIMDE_FLOAT16_VALUE(-575.00), SIMDE_FLOAT16_VALUE(12.88), + SIMDE_FLOAT16_VALUE(-667.50), SIMDE_FLOAT16_VALUE(380.50), + SIMDE_FLOAT16_VALUE(374.75), SIMDE_FLOAT16_VALUE(-222.50), + SIMDE_FLOAT16_VALUE(206.88), SIMDE_FLOAT16_VALUE(502.25)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-7536.000000), SIMDE_FLOAT16_VALUE(-6752.000000), + SIMDE_FLOAT16_VALUE(11696.000000), SIMDE_FLOAT16_VALUE(10864.000000), + SIMDE_FLOAT16_VALUE(-734.500000), SIMDE_FLOAT16_VALUE(599.500000), + SIMDE_FLOAT16_VALUE(-12600.000000), + SIMDE_FLOAT16_VALUE(-12016.000000)}}, + {{SIMDE_FLOAT16_VALUE(-825.50), SIMDE_FLOAT16_VALUE(-472.75), + SIMDE_FLOAT16_VALUE(-531.00), SIMDE_FLOAT16_VALUE(-366.75), + SIMDE_FLOAT16_VALUE(143.12), SIMDE_FLOAT16_VALUE(698.50), + SIMDE_FLOAT16_VALUE(700.00), SIMDE_FLOAT16_VALUE(498.25)}, + {SIMDE_FLOAT16_VALUE(908.00), SIMDE_FLOAT16_VALUE(845.50), + SIMDE_FLOAT16_VALUE(-383.50), SIMDE_FLOAT16_VALUE(383.50), + SIMDE_FLOAT16_VALUE(357.75), SIMDE_FLOAT16_VALUE(-900.50), + SIMDE_FLOAT16_VALUE(-802.00), SIMDE_FLOAT16_VALUE(966.50)}, + {SIMDE_FLOAT16_VALUE(-993.00), SIMDE_FLOAT16_VALUE(477.50), + SIMDE_FLOAT16_VALUE(-23.00), SIMDE_FLOAT16_VALUE(102.38), + SIMDE_FLOAT16_VALUE(988.50), SIMDE_FLOAT16_VALUE(-311.75), + SIMDE_FLOAT16_VALUE(-668.50), SIMDE_FLOAT16_VALUE(148.25)}, + INT32_C(2), + {SIMDE_FLOAT16_VALUE(-21712.000000), SIMDE_FLOAT16_VALUE(-21360.000000), + SIMDE_FLOAT16_VALUE(8288.000000), SIMDE_FLOAT16_VALUE(8456.000000), + SIMDE_FLOAT16_VALUE(-8084.000000), SIMDE_FLOAT16_VALUE(-7528.000000), + SIMDE_FLOAT16_VALUE(19152.000000), SIMDE_FLOAT16_VALUE(18944.000000)}}, + {{SIMDE_FLOAT16_VALUE(213.88), SIMDE_FLOAT16_VALUE(337.75), + SIMDE_FLOAT16_VALUE(330.50), SIMDE_FLOAT16_VALUE(-88.56), + SIMDE_FLOAT16_VALUE(191.12), SIMDE_FLOAT16_VALUE(-615.00), + SIMDE_FLOAT16_VALUE(-113.25), SIMDE_FLOAT16_VALUE(775.50)}, + {SIMDE_FLOAT16_VALUE(295.50), SIMDE_FLOAT16_VALUE(687.00), + SIMDE_FLOAT16_VALUE(406.25), SIMDE_FLOAT16_VALUE(439.50), + SIMDE_FLOAT16_VALUE(-827.50), SIMDE_FLOAT16_VALUE(733.00), + SIMDE_FLOAT16_VALUE(499.00), SIMDE_FLOAT16_VALUE(931.00)}, + {SIMDE_FLOAT16_VALUE(790.00), SIMDE_FLOAT16_VALUE(-979.00), + SIMDE_FLOAT16_VALUE(70.62), SIMDE_FLOAT16_VALUE(-47.00), + SIMDE_FLOAT16_VALUE(228.50), SIMDE_FLOAT16_VALUE(-233.50), + SIMDE_FLOAT16_VALUE(-467.50), SIMDE_FLOAT16_VALUE(545.00)}, + INT32_C(3), + {SIMDE_FLOAT16_VALUE(-13672.000000), SIMDE_FLOAT16_VALUE(-13552.000000), + SIMDE_FLOAT16_VALUE(-18768.000000), SIMDE_FLOAT16_VALUE(-19184.000000), + SIMDE_FLOAT16_VALUE(39072.000000), SIMDE_FLOAT16_VALUE(38272.000000), + SIMDE_FLOAT16_VALUE(-23568.000000), + SIMDE_FLOAT16_VALUE(-22672.000000)}}}; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); - simde_float16x8_t r; + simde_float16x8_t r; // = simde_vcmlaq_laneq_f16(r_, a, b, test_vec[i].lane); // simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); - SIMDE_CONSTIFY_4_(simde_vcmlaq_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_4_( + simde_vcmlaq_laneq_f16, r, + (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + test_vec[i].lane, r_, a, b); // write_f16x8(r); - simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), + 1); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 2, 3, 0, 1, 2, 3}; + for (int i = 0; i < 8; i++) { simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t r = simde_vcmlaq_laneq_f16(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -654,8 +885,7 @@ test_simde_vcmlaq_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmlaq_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmlaq_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[4]; @@ -664,69 +894,103 @@ test_simde_vcmlaq_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[4]; } test_vec[] = { - { { SIMDE_FLOAT32_C( 355.18), SIMDE_FLOAT32_C( 169.63), SIMDE_FLOAT32_C( 116.87), SIMDE_FLOAT32_C( -467.19) }, - { SIMDE_FLOAT32_C( -513.94), SIMDE_FLOAT32_C( -999.71), SIMDE_FLOAT32_C( -285.25), SIMDE_FLOAT32_C( -931.88) }, - { SIMDE_FLOAT32_C( -839.57), SIMDE_FLOAT32_C( -681.40), SIMDE_FLOAT32_C( -117.60), SIMDE_FLOAT32_C( -459.86) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(431843.781250), SIMDE_FLOAT32_C(431658.250000), SIMDE_FLOAT32_C(239604.218750), SIMDE_FLOAT32_C(239020.156250) } }, - { { SIMDE_FLOAT32_C( -57.67), SIMDE_FLOAT32_C( -897.07), SIMDE_FLOAT32_C( 118.98), SIMDE_FLOAT32_C( -387.92) }, - { SIMDE_FLOAT32_C( -362.79), SIMDE_FLOAT32_C( 160.99), SIMDE_FLOAT32_C( -2.72), SIMDE_FLOAT32_C( 206.65) }, - { SIMDE_FLOAT32_C( 49.34), SIMDE_FLOAT32_C( 511.85), SIMDE_FLOAT32_C( 547.20), SIMDE_FLOAT32_C( -119.58) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(-185751.734375), SIMDE_FLOAT32_C(-186591.140625), SIMDE_FLOAT32_C(-1273.252075), SIMDE_FLOAT32_C(-1780.152100) } }, - { { SIMDE_FLOAT32_C( -219.54), SIMDE_FLOAT32_C( -959.14), SIMDE_FLOAT32_C( 943.92), SIMDE_FLOAT32_C( 628.48) }, - { SIMDE_FLOAT32_C( 446.65), SIMDE_FLOAT32_C( -500.77), SIMDE_FLOAT32_C( -347.79), SIMDE_FLOAT32_C( 813.11) }, - { SIMDE_FLOAT32_C( -542.25), SIMDE_FLOAT32_C( 232.48), SIMDE_FLOAT32_C( 684.35), SIMDE_FLOAT32_C( 710.26) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-242415.500000), SIMDE_FLOAT32_C(-243155.093750), SIMDE_FLOAT32_C(189533.046875), SIMDE_FLOAT32_C(189217.609375) } }, - { { SIMDE_FLOAT32_C( 783.09), SIMDE_FLOAT32_C( -727.02), SIMDE_FLOAT32_C( -586.46), SIMDE_FLOAT32_C( 64.33) }, - { SIMDE_FLOAT32_C( -490.08), SIMDE_FLOAT32_C( 740.49), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -759.78) }, - { SIMDE_FLOAT32_C( -380.84), SIMDE_FLOAT32_C( 993.01), SIMDE_FLOAT32_C( -759.56), SIMDE_FLOAT32_C( 861.16) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(-485871.250000), SIMDE_FLOAT32_C(-487381.343750), SIMDE_FLOAT32_C(-588011.437500), SIMDE_FLOAT32_C(-587360.687500) } }, - { { SIMDE_FLOAT32_C( 998.31), SIMDE_FLOAT32_C( 538.40), SIMDE_FLOAT32_C( -191.12), SIMDE_FLOAT32_C( -434.48) }, - { SIMDE_FLOAT32_C( 592.83), SIMDE_FLOAT32_C( 820.32), SIMDE_FLOAT32_C( -296.84), SIMDE_FLOAT32_C( -612.30) }, - { SIMDE_FLOAT32_C( -552.34), SIMDE_FLOAT32_C( 329.08), SIMDE_FLOAT32_C( 765.26), SIMDE_FLOAT32_C( -531.08) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-326445.437500), SIMDE_FLOAT32_C(-326905.343750), SIMDE_FLOAT32_C(163765.484375), SIMDE_FLOAT32_C(163522.125000) } }, - { { SIMDE_FLOAT32_C( 52.61), SIMDE_FLOAT32_C( 606.93), SIMDE_FLOAT32_C( -894.22), SIMDE_FLOAT32_C( -854.38) }, - { SIMDE_FLOAT32_C( 972.80), SIMDE_FLOAT32_C( -807.39), SIMDE_FLOAT32_C( 668.59), SIMDE_FLOAT32_C( -228.19) }, - { SIMDE_FLOAT32_C( -528.51), SIMDE_FLOAT32_C( 730.93), SIMDE_FLOAT32_C( -230.95), SIMDE_FLOAT32_C( -140.17) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(711101.312500), SIMDE_FLOAT32_C(711655.625000), SIMDE_FLOAT32_C(487798.281250), SIMDE_FLOAT32_C(487838.125000) } }, - { { SIMDE_FLOAT32_C( 556.73), SIMDE_FLOAT32_C( -701.90), SIMDE_FLOAT32_C( -356.50), SIMDE_FLOAT32_C( -532.42) }, - { SIMDE_FLOAT32_C( 856.94), SIMDE_FLOAT32_C( -261.67), SIMDE_FLOAT32_C( -208.07), SIMDE_FLOAT32_C( 27.93) }, - { SIMDE_FLOAT32_C( 924.32), SIMDE_FLOAT32_C( -863.60), SIMDE_FLOAT32_C( -687.65), SIMDE_FLOAT32_C( 238.39) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(792643.500000), SIMDE_FLOAT32_C(791384.875000), SIMDE_FLOAT32_C(-192679.765625), SIMDE_FLOAT32_C(-192855.687500) } }, - { { SIMDE_FLOAT32_C( -286.79), SIMDE_FLOAT32_C( 630.61), SIMDE_FLOAT32_C( -989.22), SIMDE_FLOAT32_C( 223.21) }, - { SIMDE_FLOAT32_C( 812.31), SIMDE_FLOAT32_C( 667.33), SIMDE_FLOAT32_C( 841.41), SIMDE_FLOAT32_C( 735.52) }, - { SIMDE_FLOAT32_C( 308.52), SIMDE_FLOAT32_C( -189.06), SIMDE_FLOAT32_C( -63.33), SIMDE_FLOAT32_C( 837.76) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(-153862.109375), SIMDE_FLOAT32_C(-152944.718750), SIMDE_FLOAT32_C(-160066.187500), SIMDE_FLOAT32_C(-158853.750000) } } - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + {{SIMDE_FLOAT32_C(355.18), SIMDE_FLOAT32_C(169.63), + SIMDE_FLOAT32_C(116.87), SIMDE_FLOAT32_C(-467.19)}, + {SIMDE_FLOAT32_C(-513.94), SIMDE_FLOAT32_C(-999.71), + SIMDE_FLOAT32_C(-285.25), SIMDE_FLOAT32_C(-931.88)}, + {SIMDE_FLOAT32_C(-839.57), SIMDE_FLOAT32_C(-681.40), + SIMDE_FLOAT32_C(-117.60), SIMDE_FLOAT32_C(-459.86)}, + INT32_C(0), + {SIMDE_FLOAT32_C(431843.781250), SIMDE_FLOAT32_C(431658.250000), + SIMDE_FLOAT32_C(239604.218750), SIMDE_FLOAT32_C(239020.156250)}}, + {{SIMDE_FLOAT32_C(-57.67), SIMDE_FLOAT32_C(-897.07), + SIMDE_FLOAT32_C(118.98), SIMDE_FLOAT32_C(-387.92)}, + {SIMDE_FLOAT32_C(-362.79), SIMDE_FLOAT32_C(160.99), + SIMDE_FLOAT32_C(-2.72), SIMDE_FLOAT32_C(206.65)}, + {SIMDE_FLOAT32_C(49.34), SIMDE_FLOAT32_C(511.85), + SIMDE_FLOAT32_C(547.20), SIMDE_FLOAT32_C(-119.58)}, + INT32_C(1), + {SIMDE_FLOAT32_C(-185751.734375), SIMDE_FLOAT32_C(-186591.140625), + SIMDE_FLOAT32_C(-1273.252075), SIMDE_FLOAT32_C(-1780.152100)}}, + {{SIMDE_FLOAT32_C(-219.54), SIMDE_FLOAT32_C(-959.14), + SIMDE_FLOAT32_C(943.92), SIMDE_FLOAT32_C(628.48)}, + {SIMDE_FLOAT32_C(446.65), SIMDE_FLOAT32_C(-500.77), + SIMDE_FLOAT32_C(-347.79), SIMDE_FLOAT32_C(813.11)}, + {SIMDE_FLOAT32_C(-542.25), SIMDE_FLOAT32_C(232.48), + SIMDE_FLOAT32_C(684.35), SIMDE_FLOAT32_C(710.26)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-242415.500000), SIMDE_FLOAT32_C(-243155.093750), + SIMDE_FLOAT32_C(189533.046875), SIMDE_FLOAT32_C(189217.609375)}}, + {{SIMDE_FLOAT32_C(783.09), SIMDE_FLOAT32_C(-727.02), + SIMDE_FLOAT32_C(-586.46), SIMDE_FLOAT32_C(64.33)}, + {SIMDE_FLOAT32_C(-490.08), SIMDE_FLOAT32_C(740.49), + SIMDE_FLOAT32_C(-591.56), SIMDE_FLOAT32_C(-759.78)}, + {SIMDE_FLOAT32_C(-380.84), SIMDE_FLOAT32_C(993.01), + SIMDE_FLOAT32_C(-759.56), SIMDE_FLOAT32_C(861.16)}, + INT32_C(1), + {SIMDE_FLOAT32_C(-485871.250000), SIMDE_FLOAT32_C(-487381.343750), + SIMDE_FLOAT32_C(-588011.437500), SIMDE_FLOAT32_C(-587360.687500)}}, + {{SIMDE_FLOAT32_C(998.31), SIMDE_FLOAT32_C(538.40), + SIMDE_FLOAT32_C(-191.12), SIMDE_FLOAT32_C(-434.48)}, + {SIMDE_FLOAT32_C(592.83), SIMDE_FLOAT32_C(820.32), + SIMDE_FLOAT32_C(-296.84), SIMDE_FLOAT32_C(-612.30)}, + {SIMDE_FLOAT32_C(-552.34), SIMDE_FLOAT32_C(329.08), + SIMDE_FLOAT32_C(765.26), SIMDE_FLOAT32_C(-531.08)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-326445.437500), SIMDE_FLOAT32_C(-326905.343750), + SIMDE_FLOAT32_C(163765.484375), SIMDE_FLOAT32_C(163522.125000)}}, + {{SIMDE_FLOAT32_C(52.61), SIMDE_FLOAT32_C(606.93), + SIMDE_FLOAT32_C(-894.22), SIMDE_FLOAT32_C(-854.38)}, + {SIMDE_FLOAT32_C(972.80), SIMDE_FLOAT32_C(-807.39), + SIMDE_FLOAT32_C(668.59), SIMDE_FLOAT32_C(-228.19)}, + {SIMDE_FLOAT32_C(-528.51), SIMDE_FLOAT32_C(730.93), + SIMDE_FLOAT32_C(-230.95), SIMDE_FLOAT32_C(-140.17)}, + INT32_C(1), + {SIMDE_FLOAT32_C(711101.312500), SIMDE_FLOAT32_C(711655.625000), + SIMDE_FLOAT32_C(487798.281250), SIMDE_FLOAT32_C(487838.125000)}}, + {{SIMDE_FLOAT32_C(556.73), SIMDE_FLOAT32_C(-701.90), + SIMDE_FLOAT32_C(-356.50), SIMDE_FLOAT32_C(-532.42)}, + {SIMDE_FLOAT32_C(856.94), SIMDE_FLOAT32_C(-261.67), + SIMDE_FLOAT32_C(-208.07), SIMDE_FLOAT32_C(27.93)}, + {SIMDE_FLOAT32_C(924.32), SIMDE_FLOAT32_C(-863.60), + SIMDE_FLOAT32_C(-687.65), SIMDE_FLOAT32_C(238.39)}, + INT32_C(0), + {SIMDE_FLOAT32_C(792643.500000), SIMDE_FLOAT32_C(791384.875000), + SIMDE_FLOAT32_C(-192679.765625), SIMDE_FLOAT32_C(-192855.687500)}}, + {{SIMDE_FLOAT32_C(-286.79), SIMDE_FLOAT32_C(630.61), + SIMDE_FLOAT32_C(-989.22), SIMDE_FLOAT32_C(223.21)}, + {SIMDE_FLOAT32_C(812.31), SIMDE_FLOAT32_C(667.33), + SIMDE_FLOAT32_C(841.41), SIMDE_FLOAT32_C(735.52)}, + {SIMDE_FLOAT32_C(308.52), SIMDE_FLOAT32_C(-189.06), + SIMDE_FLOAT32_C(-63.33), SIMDE_FLOAT32_C(837.76)}, + INT32_C(1), + {SIMDE_FLOAT32_C(-153862.109375), SIMDE_FLOAT32_C(-152944.718750), + SIMDE_FLOAT32_C(-160066.187500), SIMDE_FLOAT32_C(-158853.750000)}}}; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r; - SIMDE_CONSTIFY_2_(simde_vcmlaq_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_( + simde_vcmlaq_laneq_f32, r, + (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), + test_vec[i].lane, r_, a, b); // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); - simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), + 1); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; + for (int i = 0; i < 8; i++) { simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcmlaq_laneq_f32(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); diff --git a/test/arm/neon/cmla_rot180_lane.c b/test/arm/neon/cmla_rot180_lane.c index 91f99ced2..1553af309 100644 --- a/test/arm/neon/cmla_rot180_lane.c +++ b/test/arm/neon/cmla_rot180_lane.c @@ -1,11 +1,11 @@ #define SIMDE_TEST_ARM_NEON_INSN cmla_rot180_lane -#include "test-neon.h" #include "../../../simde/arm/neon/cmla_rot180_lane.h" + #include "../../../simde/arm/neon/dup_n.h" +#include "test-neon.h" -static int -test_simde_vcmla_rot180_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmla_rot180_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float16_t r_[4]; @@ -14,73 +14,107 @@ test_simde_vcmla_rot180_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[4]; } test_vec[] = { - { - { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) }, - { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -31.19) }, - { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -48.00) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 7624.00), SIMDE_FLOAT16_VALUE( 7564.00), SIMDE_FLOAT16_VALUE( 9160.00), SIMDE_FLOAT16_VALUE( 9224.00) } }, - { { SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, - { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -14.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, - { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 61.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(-17248.00), SIMDE_FLOAT16_VALUE(-17872.00), SIMDE_FLOAT16_VALUE( 8064.00), SIMDE_FLOAT16_VALUE( 9344.00) } }, - { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, - { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, - { SIMDE_FLOAT16_VALUE( -61.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 8448.00), SIMDE_FLOAT16_VALUE( 8232.00), SIMDE_FLOAT16_VALUE( -7296.00), SIMDE_FLOAT16_VALUE( -8968.00) } }, - { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -180.50) }, - { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 129.00), SIMDE_FLOAT16_VALUE( 99.06), SIMDE_FLOAT16_VALUE( -75.25) }, - { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 239.50), SIMDE_FLOAT16_VALUE( -29.96), SIMDE_FLOAT16_VALUE( -177.50) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( 37728.00), SIMDE_FLOAT16_VALUE( 37440.00), SIMDE_FLOAT16_VALUE(-23856.00), SIMDE_FLOAT16_VALUE(-23904.00) } }, - { { SIMDE_FLOAT16_VALUE( 167.25), SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( -63.38), SIMDE_FLOAT16_VALUE( 57.00) }, - { SIMDE_FLOAT16_VALUE( 191.75), SIMDE_FLOAT16_VALUE( -197.00), SIMDE_FLOAT16_VALUE( 285.00), SIMDE_FLOAT16_VALUE( -529.00) }, - { SIMDE_FLOAT16_VALUE( -80.50), SIMDE_FLOAT16_VALUE( 375.50), SIMDE_FLOAT16_VALUE( -206.00), SIMDE_FLOAT16_VALUE( -75.25) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 15600.00), SIMDE_FLOAT16_VALUE( 15432.00), SIMDE_FLOAT16_VALUE( 22880.00), SIMDE_FLOAT16_VALUE( 22992.00) } }, - { { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( -41.00), SIMDE_FLOAT16_VALUE( 285.00) }, - { SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -271.25), SIMDE_FLOAT16_VALUE( -257.50) }, - { SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( -180.50), SIMDE_FLOAT16_VALUE( 69.62), SIMDE_FLOAT16_VALUE( 131.38) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( -305.50), SIMDE_FLOAT16_VALUE( -349.50), SIMDE_FLOAT16_VALUE(-48992.00), SIMDE_FLOAT16_VALUE(-48672.00) } }, - { { SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -247.00), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 17.94) }, - { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( -110.75), SIMDE_FLOAT16_VALUE( 18.20) }, - { SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 97.31) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 35936.00), SIMDE_FLOAT16_VALUE( 35488.00), SIMDE_FLOAT16_VALUE( 10008.00), SIMDE_FLOAT16_VALUE( 9968.00) } }, - { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00) }, - { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( -151.12) }, - { SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44), SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( 75.88) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( 16880.00), SIMDE_FLOAT16_VALUE( 17040.00), SIMDE_FLOAT16_VALUE( 14712.00), SIMDE_FLOAT16_VALUE( 14832.00) } } - + {{SIMDE_FLOAT16_VALUE(-49.28), SIMDE_FLOAT16_VALUE(-109.00), + SIMDE_FLOAT16_VALUE(-626.50), SIMDE_FLOAT16_VALUE(-567.00)}, + {SIMDE_FLOAT16_VALUE(-178.88), SIMDE_FLOAT16_VALUE(10.22), + SIMDE_FLOAT16_VALUE(-228.12), SIMDE_FLOAT16_VALUE(-31.19)}, + {SIMDE_FLOAT16_VALUE(42.90), SIMDE_FLOAT16_VALUE(-98.75), + SIMDE_FLOAT16_VALUE(350.00), SIMDE_FLOAT16_VALUE(-48.00)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(7624.00), SIMDE_FLOAT16_VALUE(7564.00), + SIMDE_FLOAT16_VALUE(9160.00), SIMDE_FLOAT16_VALUE(9224.00)}}, + {{SIMDE_FLOAT16_VALUE(730.00), SIMDE_FLOAT16_VALUE(103.12), + SIMDE_FLOAT16_VALUE(-381.25), SIMDE_FLOAT16_VALUE(898.50)}, + {SIMDE_FLOAT16_VALUE(293.00), SIMDE_FLOAT16_VALUE(-14.06), + SIMDE_FLOAT16_VALUE(-137.62), SIMDE_FLOAT16_VALUE(722.50)}, + {SIMDE_FLOAT16_VALUE(-6.33), SIMDE_FLOAT16_VALUE(61.38), + SIMDE_FLOAT16_VALUE(115.50), SIMDE_FLOAT16_VALUE(70.38)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-17248.00), SIMDE_FLOAT16_VALUE(-17872.00), + SIMDE_FLOAT16_VALUE(8064.00), SIMDE_FLOAT16_VALUE(9344.00)}}, + {{SIMDE_FLOAT16_VALUE(-210.88), SIMDE_FLOAT16_VALUE(-430.75), + SIMDE_FLOAT16_VALUE(725.50), SIMDE_FLOAT16_VALUE(-944.50)}, + {SIMDE_FLOAT16_VALUE(142.00), SIMDE_FLOAT16_VALUE(-966.00), + SIMDE_FLOAT16_VALUE(-131.50), SIMDE_FLOAT16_VALUE(462.50)}, + {SIMDE_FLOAT16_VALUE(-61.00), SIMDE_FLOAT16_VALUE(185.62), + SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(749.50)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(8448.00), SIMDE_FLOAT16_VALUE(8232.00), + SIMDE_FLOAT16_VALUE(-7296.00), SIMDE_FLOAT16_VALUE(-8968.00)}}, + {{SIMDE_FLOAT16_VALUE(89.44), SIMDE_FLOAT16_VALUE(-200.50), + SIMDE_FLOAT16_VALUE(-136.50), SIMDE_FLOAT16_VALUE(-180.50)}, + {SIMDE_FLOAT16_VALUE(-157.12), SIMDE_FLOAT16_VALUE(129.00), + SIMDE_FLOAT16_VALUE(99.06), SIMDE_FLOAT16_VALUE(-75.25)}, + {SIMDE_FLOAT16_VALUE(32.59), SIMDE_FLOAT16_VALUE(239.50), + SIMDE_FLOAT16_VALUE(-29.96), SIMDE_FLOAT16_VALUE(-177.50)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(37728.00), SIMDE_FLOAT16_VALUE(37440.00), + SIMDE_FLOAT16_VALUE(-23856.00), SIMDE_FLOAT16_VALUE(-23904.00)}}, + {{SIMDE_FLOAT16_VALUE(167.25), SIMDE_FLOAT16_VALUE(-1.52), + SIMDE_FLOAT16_VALUE(-63.38), SIMDE_FLOAT16_VALUE(57.00)}, + {SIMDE_FLOAT16_VALUE(191.75), SIMDE_FLOAT16_VALUE(-197.00), + SIMDE_FLOAT16_VALUE(285.00), SIMDE_FLOAT16_VALUE(-529.00)}, + {SIMDE_FLOAT16_VALUE(-80.50), SIMDE_FLOAT16_VALUE(375.50), + SIMDE_FLOAT16_VALUE(-206.00), SIMDE_FLOAT16_VALUE(-75.25)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(15600.00), SIMDE_FLOAT16_VALUE(15432.00), + SIMDE_FLOAT16_VALUE(22880.00), SIMDE_FLOAT16_VALUE(22992.00)}}, + {{SIMDE_FLOAT16_VALUE(-31.19), SIMDE_FLOAT16_VALUE(-75.25), + SIMDE_FLOAT16_VALUE(-41.00), SIMDE_FLOAT16_VALUE(285.00)}, + {SIMDE_FLOAT16_VALUE(-1.52), SIMDE_FLOAT16_VALUE(10.22), + SIMDE_FLOAT16_VALUE(-271.25), SIMDE_FLOAT16_VALUE(-257.50)}, + {SIMDE_FLOAT16_VALUE(-31.45), SIMDE_FLOAT16_VALUE(-180.50), + SIMDE_FLOAT16_VALUE(69.62), SIMDE_FLOAT16_VALUE(131.38)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-305.50), SIMDE_FLOAT16_VALUE(-349.50), + SIMDE_FLOAT16_VALUE(-48992.00), SIMDE_FLOAT16_VALUE(-48672.00)}}, + {{SIMDE_FLOAT16_VALUE(205.75), SIMDE_FLOAT16_VALUE(-247.00), + SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(17.94)}, + {SIMDE_FLOAT16_VALUE(-397.75), SIMDE_FLOAT16_VALUE(166.12), + SIMDE_FLOAT16_VALUE(-110.75), SIMDE_FLOAT16_VALUE(18.20)}, + {SIMDE_FLOAT16_VALUE(89.81), SIMDE_FLOAT16_VALUE(59.75), + SIMDE_FLOAT16_VALUE(-113.25), SIMDE_FLOAT16_VALUE(97.31)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(35936.00), SIMDE_FLOAT16_VALUE(35488.00), + SIMDE_FLOAT16_VALUE(10008.00), SIMDE_FLOAT16_VALUE(9968.00)}}, + {{SIMDE_FLOAT16_VALUE(-869.50), SIMDE_FLOAT16_VALUE(-710.00), + SIMDE_FLOAT16_VALUE(457.25), SIMDE_FLOAT16_VALUE(579.00)}, + {SIMDE_FLOAT16_VALUE(226.38), SIMDE_FLOAT16_VALUE(178.88), + SIMDE_FLOAT16_VALUE(181.75), SIMDE_FLOAT16_VALUE(-151.12)}, + {SIMDE_FLOAT16_VALUE(121.50), SIMDE_FLOAT16_VALUE(-78.44), + SIMDE_FLOAT16_VALUE(48.06), SIMDE_FLOAT16_VALUE(75.88)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(16880.00), SIMDE_FLOAT16_VALUE(17040.00), + SIMDE_FLOAT16_VALUE(14712.00), SIMDE_FLOAT16_VALUE(14832.00)}} + }; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x4_t r; - SIMDE_CONSTIFY_2_(simde_vcmla_rot180_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_( + simde_vcmla_rot180_lane_f16, r, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); - - // simde_float16x4_t r = simde_vcmla_rot180_lane_f16(r_, a, b, test_vec[i].lane); - // simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + + // simde_float16x4_t r = simde_vcmla_rot180_lane_f16(r_, a, b, + // test_vec[i].lane); simde_test_arm_neon_write_f16x4(2, r, + // SIMDE_TEST_VEC_POS_LAST); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; + for (int i = 0; i < 8; i++) { simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t r = simde_vcmla_rot180_lane_f16(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -91,8 +125,7 @@ test_simde_vcmla_rot180_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmla_rot180_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmla_rot180_lane_f32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[2]; @@ -101,71 +134,69 @@ test_simde_vcmla_rot180_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[2]; } test_vec[] = { - { { SIMDE_FLOAT32_C( 83.21), SIMDE_FLOAT32_C( 417.90) }, - { SIMDE_FLOAT32_C( -875.72), SIMDE_FLOAT32_C( 830.54) }, - { SIMDE_FLOAT32_C( -633.53), SIMDE_FLOAT32_C( 832.17) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-554711.687500), SIMDE_FLOAT32_C(-554377.000000) } }, - { { SIMDE_FLOAT32_C( -890.17), SIMDE_FLOAT32_C( 649.92) }, - { SIMDE_FLOAT32_C( -111.22), SIMDE_FLOAT32_C( -830.36) }, - { SIMDE_FLOAT32_C( 59.76), SIMDE_FLOAT32_C( 970.61) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(5756.336914), SIMDE_FLOAT32_C(7296.427246) } }, - { { SIMDE_FLOAT32_C( 522.31), SIMDE_FLOAT32_C( -822.40) }, - { SIMDE_FLOAT32_C( 411.34), SIMDE_FLOAT32_C( -692.35) }, - { SIMDE_FLOAT32_C( 648.71), SIMDE_FLOAT32_C( 385.20) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-266318.062500), SIMDE_FLOAT32_C(-267662.781250) } }, - { { SIMDE_FLOAT32_C( 479.18), SIMDE_FLOAT32_C( -793.73) }, - { SIMDE_FLOAT32_C( -740.26), SIMDE_FLOAT32_C( 245.04) }, - { SIMDE_FLOAT32_C( 229.26), SIMDE_FLOAT32_C( -113.23) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(170191.187500), SIMDE_FLOAT32_C(168918.281250) } }, - { { SIMDE_FLOAT32_C( 331.48), SIMDE_FLOAT32_C( -677.34) }, - { SIMDE_FLOAT32_C( 97.30), SIMDE_FLOAT32_C( -52.10) }, - { SIMDE_FLOAT32_C( 239.17), SIMDE_FLOAT32_C( 469.68) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-22939.761719), SIMDE_FLOAT32_C(-23948.582031) } }, - { { SIMDE_FLOAT32_C( -543.40), SIMDE_FLOAT32_C( 826.85) }, - { SIMDE_FLOAT32_C( 226.38), SIMDE_FLOAT32_C( 178.84) }, - { SIMDE_FLOAT32_C( 181.71), SIMDE_FLOAT32_C( 420.52) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-41678.914062), SIMDE_FLOAT32_C(-40308.664062) } }, - { { SIMDE_FLOAT32_C( -698.84), SIMDE_FLOAT32_C( -151.15) }, - { SIMDE_FLOAT32_C( -388.27), SIMDE_FLOAT32_C( 350.81) }, - { SIMDE_FLOAT32_C( 890.40), SIMDE_FLOAT32_C( -664.75) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(345016.781250), SIMDE_FLOAT32_C(345564.468750) } }, - { { SIMDE_FLOAT32_C( -617.94), SIMDE_FLOAT32_C( 190.84) }, - { SIMDE_FLOAT32_C( 218.13), SIMDE_FLOAT32_C( -328.97) }, - { SIMDE_FLOAT32_C( -549.59), SIMDE_FLOAT32_C( -459.89) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(119264.132812), SIMDE_FLOAT32_C(120072.914062) } } - }; + {{SIMDE_FLOAT32_C(83.21), SIMDE_FLOAT32_C(417.90)}, + {SIMDE_FLOAT32_C(-875.72), SIMDE_FLOAT32_C(830.54)}, + {SIMDE_FLOAT32_C(-633.53), SIMDE_FLOAT32_C(832.17)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-554711.687500), SIMDE_FLOAT32_C(-554377.000000)}}, + {{SIMDE_FLOAT32_C(-890.17), SIMDE_FLOAT32_C(649.92)}, + {SIMDE_FLOAT32_C(-111.22), SIMDE_FLOAT32_C(-830.36)}, + {SIMDE_FLOAT32_C(59.76), SIMDE_FLOAT32_C(970.61)}, + INT32_C(0), + {SIMDE_FLOAT32_C(5756.336914), SIMDE_FLOAT32_C(7296.427246)}}, + {{SIMDE_FLOAT32_C(522.31), SIMDE_FLOAT32_C(-822.40)}, + {SIMDE_FLOAT32_C(411.34), SIMDE_FLOAT32_C(-692.35)}, + {SIMDE_FLOAT32_C(648.71), SIMDE_FLOAT32_C(385.20)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-266318.062500), SIMDE_FLOAT32_C(-267662.781250)}}, + {{SIMDE_FLOAT32_C(479.18), SIMDE_FLOAT32_C(-793.73)}, + {SIMDE_FLOAT32_C(-740.26), SIMDE_FLOAT32_C(245.04)}, + {SIMDE_FLOAT32_C(229.26), SIMDE_FLOAT32_C(-113.23)}, + INT32_C(0), + {SIMDE_FLOAT32_C(170191.187500), SIMDE_FLOAT32_C(168918.281250)}}, + {{SIMDE_FLOAT32_C(331.48), SIMDE_FLOAT32_C(-677.34)}, + {SIMDE_FLOAT32_C(97.30), SIMDE_FLOAT32_C(-52.10)}, + {SIMDE_FLOAT32_C(239.17), SIMDE_FLOAT32_C(469.68)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-22939.761719), SIMDE_FLOAT32_C(-23948.582031)}}, + {{SIMDE_FLOAT32_C(-543.40), SIMDE_FLOAT32_C(826.85)}, + {SIMDE_FLOAT32_C(226.38), SIMDE_FLOAT32_C(178.84)}, + {SIMDE_FLOAT32_C(181.71), SIMDE_FLOAT32_C(420.52)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-41678.914062), SIMDE_FLOAT32_C(-40308.664062)}}, + {{SIMDE_FLOAT32_C(-698.84), SIMDE_FLOAT32_C(-151.15)}, + {SIMDE_FLOAT32_C(-388.27), SIMDE_FLOAT32_C(350.81)}, + {SIMDE_FLOAT32_C(890.40), SIMDE_FLOAT32_C(-664.75)}, + INT32_C(0), + {SIMDE_FLOAT32_C(345016.781250), SIMDE_FLOAT32_C(345564.468750)}}, + {{SIMDE_FLOAT32_C(-617.94), SIMDE_FLOAT32_C(190.84)}, + {SIMDE_FLOAT32_C(218.13), SIMDE_FLOAT32_C(-328.97)}, + {SIMDE_FLOAT32_C(-549.59), SIMDE_FLOAT32_C(-459.89)}, + INT32_C(0), + {SIMDE_FLOAT32_C(119264.132812), SIMDE_FLOAT32_C(120072.914062)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vcmla_rot180_lane_f32(r_, a, b, 0); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); // simde_float32x2_t r; - // SIMDE_CONSTIFY_2_(simde_vcmla_rot180_lane_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + // SIMDE_CONSTIFY_2_(simde_vcmla_rot180_lane_f32, r, (HEDLEY_UNREACHABLE(), + // simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); - } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 0, 0, 0, 0, 0, 0, 0}; + for (int i = 0; i < 8; i++) { simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vcmla_rot180_lane_f32(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -176,9 +207,7 @@ test_simde_vcmla_rot180_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } - -static int -test_simde_vcmla_rot180_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmla_rot180_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float16_t r_[4]; @@ -187,80 +216,122 @@ test_simde_vcmla_rot180_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[4]; } test_vec[] = { - { { SIMDE_FLOAT16_VALUE( 816.50), SIMDE_FLOAT16_VALUE( 773.50), SIMDE_FLOAT16_VALUE( -377.75), SIMDE_FLOAT16_VALUE( 544.50) }, - { SIMDE_FLOAT16_VALUE( -188.38), SIMDE_FLOAT16_VALUE( 269.75), SIMDE_FLOAT16_VALUE( 115.62), SIMDE_FLOAT16_VALUE( -544.00) }, - { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( 484.00), SIMDE_FLOAT16_VALUE( -757.00), - SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 8896.00), SIMDE_FLOAT16_VALUE( 8856.00), SIMDE_FLOAT16_VALUE( -5340.00), SIMDE_FLOAT16_VALUE( -4416.00) } }, - { { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, - { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, - { SIMDE_FLOAT16_VALUE( -899.00), SIMDE_FLOAT16_VALUE( 32.51), SIMDE_FLOAT16_VALUE( 359.00), SIMDE_FLOAT16_VALUE( 342.25), - SIMDE_FLOAT16_VALUE( 913.00), SIMDE_FLOAT16_VALUE( 740.50), SIMDE_FLOAT16_VALUE( -436.25), SIMDE_FLOAT16_VALUE( -904.00) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( 498.75), SIMDE_FLOAT16_VALUE( 91.62), SIMDE_FLOAT16_VALUE( -3892.00), SIMDE_FLOAT16_VALUE( -3032.00) } }, - { { SIMDE_FLOAT16_VALUE( -335.25), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( 227.25), SIMDE_FLOAT16_VALUE( -8.67) }, - { SIMDE_FLOAT16_VALUE( 490.50), SIMDE_FLOAT16_VALUE( 507.50), SIMDE_FLOAT16_VALUE( -979.50), SIMDE_FLOAT16_VALUE( -718.00) }, - { SIMDE_FLOAT16_VALUE( -10.20), SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -797.50), SIMDE_FLOAT16_VALUE( 741.50), - SIMDE_FLOAT16_VALUE( -835.00), SIMDE_FLOAT16_VALUE( -308.00), SIMDE_FLOAT16_VALUE( -674.50), SIMDE_FLOAT16_VALUE( -247.00) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 4668.00), SIMDE_FLOAT16_VALUE( 4752.00), SIMDE_FLOAT16_VALUE( -9768.00), SIMDE_FLOAT16_VALUE(-10000.00) } }, - { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 25.50), SIMDE_FLOAT16_VALUE( -44.50) }, - { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -66.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, - { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 85.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50), - SIMDE_FLOAT16_VALUE( -975.50), SIMDE_FLOAT16_VALUE( 383.50), SIMDE_FLOAT16_VALUE( 500.25), SIMDE_FLOAT16_VALUE( 517.00) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(-12368.00), SIMDE_FLOAT16_VALUE(-12592.00), SIMDE_FLOAT16_VALUE( 11288.00), SIMDE_FLOAT16_VALUE( 11216.00) } }, - { { SIMDE_FLOAT16_VALUE( 540.00), SIMDE_FLOAT16_VALUE( -927.50), SIMDE_FLOAT16_VALUE( 17.94), SIMDE_FLOAT16_VALUE( 340.75) }, - { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( 854.00), SIMDE_FLOAT16_VALUE( -110.75) }, - { SIMDE_FLOAT16_VALUE( -53.36), SIMDE_FLOAT16_VALUE( -465.00), SIMDE_FLOAT16_VALUE( -104.00), SIMDE_FLOAT16_VALUE( -348.00), - SIMDE_FLOAT16_VALUE( -370.75), SIMDE_FLOAT16_VALUE( 18.20), SIMDE_FLOAT16_VALUE( -209.25), SIMDE_FLOAT16_VALUE( -856.00) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-20688.00), SIMDE_FLOAT16_VALUE(-22160.00), SIMDE_FLOAT16_VALUE( 45600.00), SIMDE_FLOAT16_VALUE( 45920.00) } }, - { { SIMDE_FLOAT16_VALUE( 251.25), SIMDE_FLOAT16_VALUE( 523.50), SIMDE_FLOAT16_VALUE( -62.34), SIMDE_FLOAT16_VALUE( -412.00) }, - { SIMDE_FLOAT16_VALUE( 818.50), SIMDE_FLOAT16_VALUE( -266.25), SIMDE_FLOAT16_VALUE( 558.50), SIMDE_FLOAT16_VALUE( -474.75) }, - { SIMDE_FLOAT16_VALUE( 583.50), SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 875.50), SIMDE_FLOAT16_VALUE( 382.50), - SIMDE_FLOAT16_VALUE( -535.00), SIMDE_FLOAT16_VALUE( 374.50), SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 432.50) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(-47744.00), SIMDE_FLOAT16_VALUE(-47488.00), SIMDE_FLOAT16_VALUE(-32832.00), SIMDE_FLOAT16_VALUE(-33184.00) } }, - { { SIMDE_FLOAT16_VALUE( 83.19), SIMDE_FLOAT16_VALUE( 418.00), SIMDE_FLOAT16_VALUE( -875.50), SIMDE_FLOAT16_VALUE( 830.50) }, - { SIMDE_FLOAT16_VALUE( -633.50), SIMDE_FLOAT16_VALUE( 832.00), SIMDE_FLOAT16_VALUE( -890.00), SIMDE_FLOAT16_VALUE( 650.00) }, - { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -830.50), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( 970.50), - SIMDE_FLOAT16_VALUE( 522.50), SIMDE_FLOAT16_VALUE( -822.50), SIMDE_FLOAT16_VALUE( 411.25), SIMDE_FLOAT16_VALUE( -692.50) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-19680.00), SIMDE_FLOAT16_VALUE(-19344.00), SIMDE_FLOAT16_VALUE(-28640.00), SIMDE_FLOAT16_VALUE(-26928.00) } }, - { { SIMDE_FLOAT16_VALUE( 648.50), SIMDE_FLOAT16_VALUE( 385.25), SIMDE_FLOAT16_VALUE( 479.25), SIMDE_FLOAT16_VALUE( -793.50) }, - { SIMDE_FLOAT16_VALUE( -740.50), SIMDE_FLOAT16_VALUE( 245.00), SIMDE_FLOAT16_VALUE( 229.25), SIMDE_FLOAT16_VALUE( -113.25) }, - { SIMDE_FLOAT16_VALUE( 331.50), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( 97.31), SIMDE_FLOAT16_VALUE( -52.09), - SIMDE_FLOAT16_VALUE( 239.12), SIMDE_FLOAT16_VALUE( 469.75), SIMDE_FLOAT16_VALUE( -543.50), SIMDE_FLOAT16_VALUE( 827.00) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(-55072.00), SIMDE_FLOAT16_VALUE(-55328.00), SIMDE_FLOAT16_VALUE( 17728.00), SIMDE_FLOAT16_VALUE( 16464.00) } } - }; + {{SIMDE_FLOAT16_VALUE(816.50), SIMDE_FLOAT16_VALUE(773.50), + SIMDE_FLOAT16_VALUE(-377.75), SIMDE_FLOAT16_VALUE(544.50)}, + {SIMDE_FLOAT16_VALUE(-188.38), SIMDE_FLOAT16_VALUE(269.75), + SIMDE_FLOAT16_VALUE(115.62), SIMDE_FLOAT16_VALUE(-544.00)}, + {SIMDE_FLOAT16_VALUE(42.90), SIMDE_FLOAT16_VALUE(924.50), + SIMDE_FLOAT16_VALUE(484.00), SIMDE_FLOAT16_VALUE(-757.00), + SIMDE_FLOAT16_VALUE(730.00), SIMDE_FLOAT16_VALUE(103.12), + SIMDE_FLOAT16_VALUE(-381.25), SIMDE_FLOAT16_VALUE(898.50)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(8896.00), SIMDE_FLOAT16_VALUE(8856.00), + SIMDE_FLOAT16_VALUE(-5340.00), SIMDE_FLOAT16_VALUE(-4416.00)}}, + {{SIMDE_FLOAT16_VALUE(293.00), SIMDE_FLOAT16_VALUE(-114.06), + SIMDE_FLOAT16_VALUE(-137.62), SIMDE_FLOAT16_VALUE(722.50)}, + {SIMDE_FLOAT16_VALUE(-6.33), SIMDE_FLOAT16_VALUE(161.38), + SIMDE_FLOAT16_VALUE(115.50), SIMDE_FLOAT16_VALUE(70.38)}, + {SIMDE_FLOAT16_VALUE(-899.00), SIMDE_FLOAT16_VALUE(32.51), + SIMDE_FLOAT16_VALUE(359.00), SIMDE_FLOAT16_VALUE(342.25), + SIMDE_FLOAT16_VALUE(913.00), SIMDE_FLOAT16_VALUE(740.50), + SIMDE_FLOAT16_VALUE(-436.25), SIMDE_FLOAT16_VALUE(-904.00)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(498.75), SIMDE_FLOAT16_VALUE(91.62), + SIMDE_FLOAT16_VALUE(-3892.00), SIMDE_FLOAT16_VALUE(-3032.00)}}, + {{SIMDE_FLOAT16_VALUE(-335.25), SIMDE_FLOAT16_VALUE(-252.38), + SIMDE_FLOAT16_VALUE(227.25), SIMDE_FLOAT16_VALUE(-8.67)}, + {SIMDE_FLOAT16_VALUE(490.50), SIMDE_FLOAT16_VALUE(507.50), + SIMDE_FLOAT16_VALUE(-979.50), SIMDE_FLOAT16_VALUE(-718.00)}, + {SIMDE_FLOAT16_VALUE(-10.20), SIMDE_FLOAT16_VALUE(205.75), + SIMDE_FLOAT16_VALUE(-797.50), SIMDE_FLOAT16_VALUE(741.50), + SIMDE_FLOAT16_VALUE(-835.00), SIMDE_FLOAT16_VALUE(-308.00), + SIMDE_FLOAT16_VALUE(-674.50), SIMDE_FLOAT16_VALUE(-247.00)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(4668.00), SIMDE_FLOAT16_VALUE(4752.00), + SIMDE_FLOAT16_VALUE(-9768.00), SIMDE_FLOAT16_VALUE(-10000.00)}}, + {{SIMDE_FLOAT16_VALUE(-210.88), SIMDE_FLOAT16_VALUE(-430.75), + SIMDE_FLOAT16_VALUE(25.50), SIMDE_FLOAT16_VALUE(-44.50)}, + {SIMDE_FLOAT16_VALUE(142.00), SIMDE_FLOAT16_VALUE(-66.00), + SIMDE_FLOAT16_VALUE(-131.50), SIMDE_FLOAT16_VALUE(462.50)}, + {SIMDE_FLOAT16_VALUE(-361.00), SIMDE_FLOAT16_VALUE(85.62), + SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(749.50), + SIMDE_FLOAT16_VALUE(-975.50), SIMDE_FLOAT16_VALUE(383.50), + SIMDE_FLOAT16_VALUE(500.25), SIMDE_FLOAT16_VALUE(517.00)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-12368.00), SIMDE_FLOAT16_VALUE(-12592.00), + SIMDE_FLOAT16_VALUE(11288.00), SIMDE_FLOAT16_VALUE(11216.00)}}, + {{SIMDE_FLOAT16_VALUE(540.00), SIMDE_FLOAT16_VALUE(-927.50), + SIMDE_FLOAT16_VALUE(17.94), SIMDE_FLOAT16_VALUE(340.75)}, + {SIMDE_FLOAT16_VALUE(-397.75), SIMDE_FLOAT16_VALUE(166.12), + SIMDE_FLOAT16_VALUE(854.00), SIMDE_FLOAT16_VALUE(-110.75)}, + {SIMDE_FLOAT16_VALUE(-53.36), SIMDE_FLOAT16_VALUE(-465.00), + SIMDE_FLOAT16_VALUE(-104.00), SIMDE_FLOAT16_VALUE(-348.00), + SIMDE_FLOAT16_VALUE(-370.75), SIMDE_FLOAT16_VALUE(18.20), + SIMDE_FLOAT16_VALUE(-209.25), SIMDE_FLOAT16_VALUE(-856.00)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-20688.00), SIMDE_FLOAT16_VALUE(-22160.00), + SIMDE_FLOAT16_VALUE(45600.00), SIMDE_FLOAT16_VALUE(45920.00)}}, + {{SIMDE_FLOAT16_VALUE(251.25), SIMDE_FLOAT16_VALUE(523.50), + SIMDE_FLOAT16_VALUE(-62.34), SIMDE_FLOAT16_VALUE(-412.00)}, + {SIMDE_FLOAT16_VALUE(818.50), SIMDE_FLOAT16_VALUE(-266.25), + SIMDE_FLOAT16_VALUE(558.50), SIMDE_FLOAT16_VALUE(-474.75)}, + {SIMDE_FLOAT16_VALUE(583.50), SIMDE_FLOAT16_VALUE(58.66), + SIMDE_FLOAT16_VALUE(875.50), SIMDE_FLOAT16_VALUE(382.50), + SIMDE_FLOAT16_VALUE(-535.00), SIMDE_FLOAT16_VALUE(374.50), + SIMDE_FLOAT16_VALUE(89.81), SIMDE_FLOAT16_VALUE(432.50)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-47744.00), SIMDE_FLOAT16_VALUE(-47488.00), + SIMDE_FLOAT16_VALUE(-32832.00), SIMDE_FLOAT16_VALUE(-33184.00)}}, + {{SIMDE_FLOAT16_VALUE(83.19), SIMDE_FLOAT16_VALUE(418.00), + SIMDE_FLOAT16_VALUE(-875.50), SIMDE_FLOAT16_VALUE(830.50)}, + {SIMDE_FLOAT16_VALUE(-633.50), SIMDE_FLOAT16_VALUE(832.00), + SIMDE_FLOAT16_VALUE(-890.00), SIMDE_FLOAT16_VALUE(650.00)}, + {SIMDE_FLOAT16_VALUE(-31.19), SIMDE_FLOAT16_VALUE(-830.50), + SIMDE_FLOAT16_VALUE(59.75), SIMDE_FLOAT16_VALUE(970.50), + SIMDE_FLOAT16_VALUE(522.50), SIMDE_FLOAT16_VALUE(-822.50), + SIMDE_FLOAT16_VALUE(411.25), SIMDE_FLOAT16_VALUE(-692.50)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-19680.00), SIMDE_FLOAT16_VALUE(-19344.00), + SIMDE_FLOAT16_VALUE(-28640.00), SIMDE_FLOAT16_VALUE(-26928.00)}}, + {{SIMDE_FLOAT16_VALUE(648.50), SIMDE_FLOAT16_VALUE(385.25), + SIMDE_FLOAT16_VALUE(479.25), SIMDE_FLOAT16_VALUE(-793.50)}, + {SIMDE_FLOAT16_VALUE(-740.50), SIMDE_FLOAT16_VALUE(245.00), + SIMDE_FLOAT16_VALUE(229.25), SIMDE_FLOAT16_VALUE(-113.25)}, + {SIMDE_FLOAT16_VALUE(331.50), SIMDE_FLOAT16_VALUE(-75.25), + SIMDE_FLOAT16_VALUE(97.31), SIMDE_FLOAT16_VALUE(-52.09), + SIMDE_FLOAT16_VALUE(239.12), SIMDE_FLOAT16_VALUE(469.75), + SIMDE_FLOAT16_VALUE(-543.50), SIMDE_FLOAT16_VALUE(827.00)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-55072.00), SIMDE_FLOAT16_VALUE(-55328.00), + SIMDE_FLOAT16_VALUE(17728.00), SIMDE_FLOAT16_VALUE(16464.00)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); - - // simde_float16x4_t r = simde_vcmla_rot180_laneq_f16(r_, a, b, test_vec[i].lane); - // simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + + // simde_float16x4_t r = simde_vcmla_rot180_laneq_f16(r_, a, b, + // test_vec[i].lane); simde_test_arm_neon_write_f16x4(2, r, + // SIMDE_TEST_VEC_POS_LAST); simde_float16x4_t r; - SIMDE_CONSTIFY_2_(simde_vcmla_rot180_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_( + simde_vcmla_rot180_laneq_f16, r, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; + for (int i = 0; i < 8; i++) { simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x4_t r = simde_vcmla_rot180_laneq_f16(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -271,8 +342,7 @@ test_simde_vcmla_rot180_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmla_rot180_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmla_rot180_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[2]; @@ -281,71 +351,81 @@ test_simde_vcmla_rot180_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[2]; } test_vec[] = { - { { SIMDE_FLOAT32_C( -470.26), SIMDE_FLOAT32_C( 81.87) }, - { SIMDE_FLOAT32_C( 72.27), SIMDE_FLOAT32_C( -992.59) }, - { SIMDE_FLOAT32_C( -615.67), SIMDE_FLOAT32_C( 620.28), SIMDE_FLOAT32_C( -606.51), SIMDE_FLOAT32_C( -327.77) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(44024.207031), SIMDE_FLOAT32_C(44576.335938) } }, - { { SIMDE_FLOAT32_C( -331.80), SIMDE_FLOAT32_C( -605.94) }, - { SIMDE_FLOAT32_C( -295.86), SIMDE_FLOAT32_C( -275.42) }, - { SIMDE_FLOAT32_C( -752.64), SIMDE_FLOAT32_C( 21.91), SIMDE_FLOAT32_C( 826.83), SIMDE_FLOAT32_C( 599.82) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(6150.492188), SIMDE_FLOAT32_C(5876.352051) } }, - { { SIMDE_FLOAT32_C( -30.36), SIMDE_FLOAT32_C( 631.53) }, - { SIMDE_FLOAT32_C( 850.75), SIMDE_FLOAT32_C( -263.55) }, - { SIMDE_FLOAT32_C( 139.96), SIMDE_FLOAT32_C( 859.14), SIMDE_FLOAT32_C( -834.47), SIMDE_FLOAT32_C( 216.10) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-119101.335938), SIMDE_FLOAT32_C(-118439.445312) } }, - { { SIMDE_FLOAT32_C( 995.86), SIMDE_FLOAT32_C( 529.74) }, - { SIMDE_FLOAT32_C( 79.08), SIMDE_FLOAT32_C( 947.13) }, - { SIMDE_FLOAT32_C( 122.02), SIMDE_FLOAT32_C( -250.00), SIMDE_FLOAT32_C( -361.82), SIMDE_FLOAT32_C( 265.24) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(20765.861328), SIMDE_FLOAT32_C(20299.740234) } }, - { { SIMDE_FLOAT32_C( 275.71), SIMDE_FLOAT32_C( 2.71) }, - { SIMDE_FLOAT32_C( 99.79), SIMDE_FLOAT32_C( -137.67) }, - { SIMDE_FLOAT32_C( -761.19), SIMDE_FLOAT32_C( 813.19), SIMDE_FLOAT32_C( -897.68), SIMDE_FLOAT32_C( 653.58) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(76234.859375), SIMDE_FLOAT32_C(75961.859375) } }, - { { SIMDE_FLOAT32_C( 396.02), SIMDE_FLOAT32_C( 413.06) }, - { SIMDE_FLOAT32_C( 514.09), SIMDE_FLOAT32_C( -977.67) }, - { SIMDE_FLOAT32_C( -671.79), SIMDE_FLOAT32_C( -92.13), SIMDE_FLOAT32_C( -441.32), SIMDE_FLOAT32_C( -374.27) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(47759.132812), SIMDE_FLOAT32_C(47776.171875) } }, - { { SIMDE_FLOAT32_C( -151.97), SIMDE_FLOAT32_C( -79.55) }, - { SIMDE_FLOAT32_C( -214.62), SIMDE_FLOAT32_C( -614.75) }, - { SIMDE_FLOAT32_C( 678.79), SIMDE_FLOAT32_C( 783.83), SIMDE_FLOAT32_C( 493.05), SIMDE_FLOAT32_C( -896.00) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(145529.937500), SIMDE_FLOAT32_C(145602.359375) } }, - { { SIMDE_FLOAT32_C( 104.13), SIMDE_FLOAT32_C( 278.54) }, - { SIMDE_FLOAT32_C( 171.54), SIMDE_FLOAT32_C( -682.63) }, - { SIMDE_FLOAT32_C( 217.09), SIMDE_FLOAT32_C( 49.35), SIMDE_FLOAT32_C( 256.50), SIMDE_FLOAT32_C( -92.04) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(-8361.368164), SIMDE_FLOAT32_C(-8186.958496) } } - }; + {{SIMDE_FLOAT32_C(-470.26), SIMDE_FLOAT32_C(81.87)}, + {SIMDE_FLOAT32_C(72.27), SIMDE_FLOAT32_C(-992.59)}, + {SIMDE_FLOAT32_C(-615.67), SIMDE_FLOAT32_C(620.28), + SIMDE_FLOAT32_C(-606.51), SIMDE_FLOAT32_C(-327.77)}, + INT32_C(0), + {SIMDE_FLOAT32_C(44024.207031), SIMDE_FLOAT32_C(44576.335938)}}, + {{SIMDE_FLOAT32_C(-331.80), SIMDE_FLOAT32_C(-605.94)}, + {SIMDE_FLOAT32_C(-295.86), SIMDE_FLOAT32_C(-275.42)}, + {SIMDE_FLOAT32_C(-752.64), SIMDE_FLOAT32_C(21.91), + SIMDE_FLOAT32_C(826.83), SIMDE_FLOAT32_C(599.82)}, + INT32_C(1), + {SIMDE_FLOAT32_C(6150.492188), SIMDE_FLOAT32_C(5876.352051)}}, + {{SIMDE_FLOAT32_C(-30.36), SIMDE_FLOAT32_C(631.53)}, + {SIMDE_FLOAT32_C(850.75), SIMDE_FLOAT32_C(-263.55)}, + {SIMDE_FLOAT32_C(139.96), SIMDE_FLOAT32_C(859.14), + SIMDE_FLOAT32_C(-834.47), SIMDE_FLOAT32_C(216.10)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-119101.335938), SIMDE_FLOAT32_C(-118439.445312)}}, + {{SIMDE_FLOAT32_C(995.86), SIMDE_FLOAT32_C(529.74)}, + {SIMDE_FLOAT32_C(79.08), SIMDE_FLOAT32_C(947.13)}, + {SIMDE_FLOAT32_C(122.02), SIMDE_FLOAT32_C(-250.00), + SIMDE_FLOAT32_C(-361.82), SIMDE_FLOAT32_C(265.24)}, + INT32_C(1), + {SIMDE_FLOAT32_C(20765.861328), SIMDE_FLOAT32_C(20299.740234)}}, + {{SIMDE_FLOAT32_C(275.71), SIMDE_FLOAT32_C(2.71)}, + {SIMDE_FLOAT32_C(99.79), SIMDE_FLOAT32_C(-137.67)}, + {SIMDE_FLOAT32_C(-761.19), SIMDE_FLOAT32_C(813.19), + SIMDE_FLOAT32_C(-897.68), SIMDE_FLOAT32_C(653.58)}, + INT32_C(0), + {SIMDE_FLOAT32_C(76234.859375), SIMDE_FLOAT32_C(75961.859375)}}, + {{SIMDE_FLOAT32_C(396.02), SIMDE_FLOAT32_C(413.06)}, + {SIMDE_FLOAT32_C(514.09), SIMDE_FLOAT32_C(-977.67)}, + {SIMDE_FLOAT32_C(-671.79), SIMDE_FLOAT32_C(-92.13), + SIMDE_FLOAT32_C(-441.32), SIMDE_FLOAT32_C(-374.27)}, + INT32_C(1), + {SIMDE_FLOAT32_C(47759.132812), SIMDE_FLOAT32_C(47776.171875)}}, + {{SIMDE_FLOAT32_C(-151.97), SIMDE_FLOAT32_C(-79.55)}, + {SIMDE_FLOAT32_C(-214.62), SIMDE_FLOAT32_C(-614.75)}, + {SIMDE_FLOAT32_C(678.79), SIMDE_FLOAT32_C(783.83), + SIMDE_FLOAT32_C(493.05), SIMDE_FLOAT32_C(-896.00)}, + INT32_C(0), + {SIMDE_FLOAT32_C(145529.937500), SIMDE_FLOAT32_C(145602.359375)}}, + {{SIMDE_FLOAT32_C(104.13), SIMDE_FLOAT32_C(278.54)}, + {SIMDE_FLOAT32_C(171.54), SIMDE_FLOAT32_C(-682.63)}, + {SIMDE_FLOAT32_C(217.09), SIMDE_FLOAT32_C(49.35), + SIMDE_FLOAT32_C(256.50), SIMDE_FLOAT32_C(-92.04)}, + INT32_C(1), + {SIMDE_FLOAT32_C(-8361.368164), SIMDE_FLOAT32_C(-8186.958496)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x2_t r; - SIMDE_CONSTIFY_2_(simde_vcmla_rot180_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_( + simde_vcmla_rot180_laneq_f32, r, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), + test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); - // simde_float32x2_t r = simde_vcmla_rot180_laneq_f32(r_, a, b, test_vec[i].lane); - // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + // simde_float32x2_t r = simde_vcmla_rot180_laneq_f32(r_, a, b, + // test_vec[i].lane); simde_test_arm_neon_write_f32x2(2, r, + // SIMDE_TEST_VEC_POS_LAST); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; + for (int i = 0; i < 8; i++) { simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vcmla_rot180_laneq_f32(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -356,8 +436,7 @@ test_simde_vcmla_rot180_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmlaq_rot180_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmlaq_rot180_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float16_t r_[8]; @@ -366,97 +445,157 @@ test_simde_vcmlaq_rot180_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[8]; } test_vec[] = { - - { { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( 420.50), - SIMDE_FLOAT16_VALUE( -699.00), SIMDE_FLOAT16_VALUE( -151.12), SIMDE_FLOAT16_VALUE( -388.25), SIMDE_FLOAT16_VALUE( 350.75) }, - { SIMDE_FLOAT16_VALUE( 890.50), SIMDE_FLOAT16_VALUE( -665.00), SIMDE_FLOAT16_VALUE( -618.00), SIMDE_FLOAT16_VALUE( 190.88), - SIMDE_FLOAT16_VALUE( 218.12), SIMDE_FLOAT16_VALUE( -329.00), SIMDE_FLOAT16_VALUE( -549.50), SIMDE_FLOAT16_VALUE( -460.00) }, - { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -695.50), SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 44096.00), SIMDE_FLOAT16_VALUE( 44064.00), SIMDE_FLOAT16_VALUE(-30272.00), SIMDE_FLOAT16_VALUE(-30032.00), - SIMDE_FLOAT16_VALUE( 10048.00), SIMDE_FLOAT16_VALUE( 10600.00), SIMDE_FLOAT16_VALUE(-27472.00), SIMDE_FLOAT16_VALUE(-26736.00) } }, - { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00), - SIMDE_FLOAT16_VALUE( -786.50), SIMDE_FLOAT16_VALUE( -974.00), SIMDE_FLOAT16_VALUE( -570.50), SIMDE_FLOAT16_VALUE( 369.25) }, - { SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( -40.00), SIMDE_FLOAT16_VALUE( -52.00), SIMDE_FLOAT16_VALUE( 75.88), - SIMDE_FLOAT16_VALUE( 352.25), SIMDE_FLOAT16_VALUE( 324.25), SIMDE_FLOAT16_VALUE( 485.50), SIMDE_FLOAT16_VALUE( 43.50) }, - { SIMDE_FLOAT16_VALUE( -976.00), SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -100.62), SIMDE_FLOAT16_VALUE( -463.75) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( -5168.00), SIMDE_FLOAT16_VALUE( -5008.00), SIMDE_FLOAT16_VALUE( 5108.00), SIMDE_FLOAT16_VALUE( 5228.00), - SIMDE_FLOAT16_VALUE(-32288.00), SIMDE_FLOAT16_VALUE(-32480.00), SIMDE_FLOAT16_VALUE(-44000.00), SIMDE_FLOAT16_VALUE(-43040.00) } }, - { { SIMDE_FLOAT16_VALUE( -973.00), SIMDE_FLOAT16_VALUE( -159.62), SIMDE_FLOAT16_VALUE( -377.00), SIMDE_FLOAT16_VALUE( -853.00), - SIMDE_FLOAT16_VALUE( 831.00), SIMDE_FLOAT16_VALUE( 661.00), SIMDE_FLOAT16_VALUE( -717.00), SIMDE_FLOAT16_VALUE( 891.50) }, - { SIMDE_FLOAT16_VALUE( 535.50), SIMDE_FLOAT16_VALUE( 621.50), SIMDE_FLOAT16_VALUE( 578.50), SIMDE_FLOAT16_VALUE( 430.50), - SIMDE_FLOAT16_VALUE( -574.50), SIMDE_FLOAT16_VALUE( -319.75), SIMDE_FLOAT16_VALUE( -735.00), SIMDE_FLOAT16_VALUE( 248.50) }, - { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 192.38), SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 502.25) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-18432.00), SIMDE_FLOAT16_VALUE(-17616.00), SIMDE_FLOAT16_VALUE(-19232.00), SIMDE_FLOAT16_VALUE(-19712.00), - SIMDE_FLOAT16_VALUE( 19552.00), SIMDE_FLOAT16_VALUE( 19392.00), SIMDE_FLOAT16_VALUE( 23232.00), SIMDE_FLOAT16_VALUE( 24848.00) } }, - { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( -70.81), SIMDE_FLOAT16_VALUE( -596.50), SIMDE_FLOAT16_VALUE( 810.50), - SIMDE_FLOAT16_VALUE( 863.50), SIMDE_FLOAT16_VALUE( 828.50), SIMDE_FLOAT16_VALUE( -563.50), SIMDE_FLOAT16_VALUE( -576.50) }, - { SIMDE_FLOAT16_VALUE( -703.50), SIMDE_FLOAT16_VALUE( 384.00), SIMDE_FLOAT16_VALUE( -772.50), SIMDE_FLOAT16_VALUE( 457.50), - SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( 653.00), SIMDE_FLOAT16_VALUE( -121.00), SIMDE_FLOAT16_VALUE( 945.50) }, - { SIMDE_FLOAT16_VALUE( -280.75), SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( 688.50), SIMDE_FLOAT16_VALUE( 192.00) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(-22752.00), SIMDE_FLOAT16_VALUE(-22192.00), SIMDE_FLOAT16_VALUE(-24896.00), SIMDE_FLOAT16_VALUE(-23488.00), - SIMDE_FLOAT16_VALUE( 10176.00), SIMDE_FLOAT16_VALUE( 10136.00), SIMDE_FLOAT16_VALUE( -4368.00), SIMDE_FLOAT16_VALUE( -4384.00) } }, - { { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -323.75), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -283.75), - SIMDE_FLOAT16_VALUE( -117.75), SIMDE_FLOAT16_VALUE( -841.50), SIMDE_FLOAT16_VALUE( 665.00), SIMDE_FLOAT16_VALUE( -987.00) }, - { SIMDE_FLOAT16_VALUE( -643.00), SIMDE_FLOAT16_VALUE( -152.12), SIMDE_FLOAT16_VALUE( 964.00), SIMDE_FLOAT16_VALUE( 920.00), - SIMDE_FLOAT16_VALUE( 630.50), SIMDE_FLOAT16_VALUE( -669.50), SIMDE_FLOAT16_VALUE( 671.00), SIMDE_FLOAT16_VALUE( 257.00) }, - { SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -857.50), SIMDE_FLOAT16_VALUE( 334.75), SIMDE_FLOAT16_VALUE( -617.50) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 6048.00), SIMDE_FLOAT16_VALUE( 6248.00), SIMDE_FLOAT16_VALUE(-10736.00), SIMDE_FLOAT16_VALUE(-10136.00), - SIMDE_FLOAT16_VALUE( -6560.00), SIMDE_FLOAT16_VALUE( -7284.00), SIMDE_FLOAT16_VALUE( -6192.00), SIMDE_FLOAT16_VALUE( -7844.00) } }, - { { SIMDE_FLOAT16_VALUE( -439.50), SIMDE_FLOAT16_VALUE( 245.12), SIMDE_FLOAT16_VALUE( 111.06), SIMDE_FLOAT16_VALUE( 520.50), - SIMDE_FLOAT16_VALUE( 85.50), SIMDE_FLOAT16_VALUE( 250.25), SIMDE_FLOAT16_VALUE( -680.00), SIMDE_FLOAT16_VALUE( -750.00) }, - { SIMDE_FLOAT16_VALUE( -138.25), SIMDE_FLOAT16_VALUE( -14.62), SIMDE_FLOAT16_VALUE( -921.50), SIMDE_FLOAT16_VALUE( 225.88), - SIMDE_FLOAT16_VALUE( 242.88), SIMDE_FLOAT16_VALUE( 869.50), SIMDE_FLOAT16_VALUE( 298.00), SIMDE_FLOAT16_VALUE( 105.69) }, - { SIMDE_FLOAT16_VALUE( -722.50), SIMDE_FLOAT16_VALUE( -8.75), SIMDE_FLOAT16_VALUE( -245.75), SIMDE_FLOAT16_VALUE( 915.50) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( -1649.00), SIMDE_FLOAT16_VALUE( -964.50), SIMDE_FLOAT16_VALUE( -7952.00), SIMDE_FLOAT16_VALUE( -7544.00), - SIMDE_FLOAT16_VALUE( 2210.00), SIMDE_FLOAT16_VALUE( 2376.00), SIMDE_FLOAT16_VALUE( 1928.00), SIMDE_FLOAT16_VALUE( 1858.00) } }, - { { SIMDE_FLOAT16_VALUE( 54.19), SIMDE_FLOAT16_VALUE( -928.00), SIMDE_FLOAT16_VALUE( 362.50), SIMDE_FLOAT16_VALUE( -936.50), - SIMDE_FLOAT16_VALUE( 185.88), SIMDE_FLOAT16_VALUE( -244.38), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( -644.00) }, - { SIMDE_FLOAT16_VALUE( -517.00), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -751.50), SIMDE_FLOAT16_VALUE( -974.00), - SIMDE_FLOAT16_VALUE( -144.38), SIMDE_FLOAT16_VALUE( 338.25), SIMDE_FLOAT16_VALUE( 705.00), SIMDE_FLOAT16_VALUE( 116.88) }, - { SIMDE_FLOAT16_VALUE( 49.38), SIMDE_FLOAT16_VALUE( -363.00), SIMDE_FLOAT16_VALUE( -476.25), SIMDE_FLOAT16_VALUE( 106.69) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 25584.00), SIMDE_FLOAT16_VALUE( 24592.00), SIMDE_FLOAT16_VALUE( 37472.00), SIMDE_FLOAT16_VALUE( 36160.00), - SIMDE_FLOAT16_VALUE( 7316.00), SIMDE_FLOAT16_VALUE( 6884.00), SIMDE_FLOAT16_VALUE(-33888.00), SIMDE_FLOAT16_VALUE(-35456.00) } }, - { { SIMDE_FLOAT16_VALUE( -726.00), SIMDE_FLOAT16_VALUE( -353.75), SIMDE_FLOAT16_VALUE( 268.50), SIMDE_FLOAT16_VALUE( 729.00), - SIMDE_FLOAT16_VALUE( -470.25), SIMDE_FLOAT16_VALUE( 81.88), SIMDE_FLOAT16_VALUE( 72.25), SIMDE_FLOAT16_VALUE( -992.50) }, - { SIMDE_FLOAT16_VALUE( -615.50), SIMDE_FLOAT16_VALUE( 620.50), SIMDE_FLOAT16_VALUE( -606.50), SIMDE_FLOAT16_VALUE( -327.75), - SIMDE_FLOAT16_VALUE( -331.75), SIMDE_FLOAT16_VALUE( -606.00), SIMDE_FLOAT16_VALUE( -295.75), SIMDE_FLOAT16_VALUE( -275.50) }, - { SIMDE_FLOAT16_VALUE( -752.50), SIMDE_FLOAT16_VALUE( 21.91), SIMDE_FLOAT16_VALUE( 827.00), SIMDE_FLOAT16_VALUE( 600.00) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( 12760.00), SIMDE_FLOAT16_VALUE( 13128.00), SIMDE_FLOAT16_VALUE( 13552.00), SIMDE_FLOAT16_VALUE( 14016.00), - SIMDE_FLOAT16_VALUE( 6796.00), SIMDE_FLOAT16_VALUE( 7348.00), SIMDE_FLOAT16_VALUE( 6552.00), SIMDE_FLOAT16_VALUE( 5488.00) } } - + + {{SIMDE_FLOAT16_VALUE(226.38), SIMDE_FLOAT16_VALUE(178.88), + SIMDE_FLOAT16_VALUE(181.75), SIMDE_FLOAT16_VALUE(420.50), + SIMDE_FLOAT16_VALUE(-699.00), SIMDE_FLOAT16_VALUE(-151.12), + SIMDE_FLOAT16_VALUE(-388.25), SIMDE_FLOAT16_VALUE(350.75)}, + {SIMDE_FLOAT16_VALUE(890.50), SIMDE_FLOAT16_VALUE(-665.00), + SIMDE_FLOAT16_VALUE(-618.00), SIMDE_FLOAT16_VALUE(190.88), + SIMDE_FLOAT16_VALUE(218.12), SIMDE_FLOAT16_VALUE(-329.00), + SIMDE_FLOAT16_VALUE(-549.50), SIMDE_FLOAT16_VALUE(-460.00)}, + {SIMDE_FLOAT16_VALUE(-49.28), SIMDE_FLOAT16_VALUE(-695.50), + SIMDE_FLOAT16_VALUE(121.50), SIMDE_FLOAT16_VALUE(-78.44)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(44096.00), SIMDE_FLOAT16_VALUE(44064.00), + SIMDE_FLOAT16_VALUE(-30272.00), SIMDE_FLOAT16_VALUE(-30032.00), + SIMDE_FLOAT16_VALUE(10048.00), SIMDE_FLOAT16_VALUE(10600.00), + SIMDE_FLOAT16_VALUE(-27472.00), SIMDE_FLOAT16_VALUE(-26736.00)}}, + {{SIMDE_FLOAT16_VALUE(-869.50), SIMDE_FLOAT16_VALUE(-710.00), + SIMDE_FLOAT16_VALUE(457.25), SIMDE_FLOAT16_VALUE(579.00), + SIMDE_FLOAT16_VALUE(-786.50), SIMDE_FLOAT16_VALUE(-974.00), + SIMDE_FLOAT16_VALUE(-570.50), SIMDE_FLOAT16_VALUE(369.25)}, + {SIMDE_FLOAT16_VALUE(48.06), SIMDE_FLOAT16_VALUE(-40.00), + SIMDE_FLOAT16_VALUE(-52.00), SIMDE_FLOAT16_VALUE(75.88), + SIMDE_FLOAT16_VALUE(352.25), SIMDE_FLOAT16_VALUE(324.25), + SIMDE_FLOAT16_VALUE(485.50), SIMDE_FLOAT16_VALUE(43.50)}, + {SIMDE_FLOAT16_VALUE(-976.00), SIMDE_FLOAT16_VALUE(89.44), + SIMDE_FLOAT16_VALUE(-100.62), SIMDE_FLOAT16_VALUE(-463.75)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-5168.00), SIMDE_FLOAT16_VALUE(-5008.00), + SIMDE_FLOAT16_VALUE(5108.00), SIMDE_FLOAT16_VALUE(5228.00), + SIMDE_FLOAT16_VALUE(-32288.00), SIMDE_FLOAT16_VALUE(-32480.00), + SIMDE_FLOAT16_VALUE(-44000.00), SIMDE_FLOAT16_VALUE(-43040.00)}}, + {{SIMDE_FLOAT16_VALUE(-973.00), SIMDE_FLOAT16_VALUE(-159.62), + SIMDE_FLOAT16_VALUE(-377.00), SIMDE_FLOAT16_VALUE(-853.00), + SIMDE_FLOAT16_VALUE(831.00), SIMDE_FLOAT16_VALUE(661.00), + SIMDE_FLOAT16_VALUE(-717.00), SIMDE_FLOAT16_VALUE(891.50)}, + {SIMDE_FLOAT16_VALUE(535.50), SIMDE_FLOAT16_VALUE(621.50), + SIMDE_FLOAT16_VALUE(578.50), SIMDE_FLOAT16_VALUE(430.50), + SIMDE_FLOAT16_VALUE(-574.50), SIMDE_FLOAT16_VALUE(-319.75), + SIMDE_FLOAT16_VALUE(-735.00), SIMDE_FLOAT16_VALUE(248.50)}, + {SIMDE_FLOAT16_VALUE(32.59), SIMDE_FLOAT16_VALUE(192.38), + SIMDE_FLOAT16_VALUE(679.00), SIMDE_FLOAT16_VALUE(502.25)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-18432.00), SIMDE_FLOAT16_VALUE(-17616.00), + SIMDE_FLOAT16_VALUE(-19232.00), SIMDE_FLOAT16_VALUE(-19712.00), + SIMDE_FLOAT16_VALUE(19552.00), SIMDE_FLOAT16_VALUE(19392.00), + SIMDE_FLOAT16_VALUE(23232.00), SIMDE_FLOAT16_VALUE(24848.00)}}, + {{SIMDE_FLOAT16_VALUE(-618.50), SIMDE_FLOAT16_VALUE(-70.81), + SIMDE_FLOAT16_VALUE(-596.50), SIMDE_FLOAT16_VALUE(810.50), + SIMDE_FLOAT16_VALUE(863.50), SIMDE_FLOAT16_VALUE(828.50), + SIMDE_FLOAT16_VALUE(-563.50), SIMDE_FLOAT16_VALUE(-576.50)}, + {SIMDE_FLOAT16_VALUE(-703.50), SIMDE_FLOAT16_VALUE(384.00), + SIMDE_FLOAT16_VALUE(-772.50), SIMDE_FLOAT16_VALUE(457.50), + SIMDE_FLOAT16_VALUE(296.00), SIMDE_FLOAT16_VALUE(653.00), + SIMDE_FLOAT16_VALUE(-121.00), SIMDE_FLOAT16_VALUE(945.50)}, + {SIMDE_FLOAT16_VALUE(-280.75), SIMDE_FLOAT16_VALUE(-31.45), + SIMDE_FLOAT16_VALUE(688.50), SIMDE_FLOAT16_VALUE(192.00)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-22752.00), SIMDE_FLOAT16_VALUE(-22192.00), + SIMDE_FLOAT16_VALUE(-24896.00), SIMDE_FLOAT16_VALUE(-23488.00), + SIMDE_FLOAT16_VALUE(10176.00), SIMDE_FLOAT16_VALUE(10136.00), + SIMDE_FLOAT16_VALUE(-4368.00), SIMDE_FLOAT16_VALUE(-4384.00)}}, + {{SIMDE_FLOAT16_VALUE(-523.00), SIMDE_FLOAT16_VALUE(-323.75), + SIMDE_FLOAT16_VALUE(-888.00), SIMDE_FLOAT16_VALUE(-283.75), + SIMDE_FLOAT16_VALUE(-117.75), SIMDE_FLOAT16_VALUE(-841.50), + SIMDE_FLOAT16_VALUE(665.00), SIMDE_FLOAT16_VALUE(-987.00)}, + {SIMDE_FLOAT16_VALUE(-643.00), SIMDE_FLOAT16_VALUE(-152.12), + SIMDE_FLOAT16_VALUE(964.00), SIMDE_FLOAT16_VALUE(920.00), + SIMDE_FLOAT16_VALUE(630.50), SIMDE_FLOAT16_VALUE(-669.50), + SIMDE_FLOAT16_VALUE(671.00), SIMDE_FLOAT16_VALUE(257.00)}, + {SIMDE_FLOAT16_VALUE(10.22), SIMDE_FLOAT16_VALUE(-857.50), + SIMDE_FLOAT16_VALUE(334.75), SIMDE_FLOAT16_VALUE(-617.50)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(6048.00), SIMDE_FLOAT16_VALUE(6248.00), + SIMDE_FLOAT16_VALUE(-10736.00), SIMDE_FLOAT16_VALUE(-10136.00), + SIMDE_FLOAT16_VALUE(-6560.00), SIMDE_FLOAT16_VALUE(-7284.00), + SIMDE_FLOAT16_VALUE(-6192.00), SIMDE_FLOAT16_VALUE(-7844.00)}}, + {{SIMDE_FLOAT16_VALUE(-439.50), SIMDE_FLOAT16_VALUE(245.12), + SIMDE_FLOAT16_VALUE(111.06), SIMDE_FLOAT16_VALUE(520.50), + SIMDE_FLOAT16_VALUE(85.50), SIMDE_FLOAT16_VALUE(250.25), + SIMDE_FLOAT16_VALUE(-680.00), SIMDE_FLOAT16_VALUE(-750.00)}, + {SIMDE_FLOAT16_VALUE(-138.25), SIMDE_FLOAT16_VALUE(-14.62), + SIMDE_FLOAT16_VALUE(-921.50), SIMDE_FLOAT16_VALUE(225.88), + SIMDE_FLOAT16_VALUE(242.88), SIMDE_FLOAT16_VALUE(869.50), + SIMDE_FLOAT16_VALUE(298.00), SIMDE_FLOAT16_VALUE(105.69)}, + {SIMDE_FLOAT16_VALUE(-722.50), SIMDE_FLOAT16_VALUE(-8.75), + SIMDE_FLOAT16_VALUE(-245.75), SIMDE_FLOAT16_VALUE(915.50)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-1649.00), SIMDE_FLOAT16_VALUE(-964.50), + SIMDE_FLOAT16_VALUE(-7952.00), SIMDE_FLOAT16_VALUE(-7544.00), + SIMDE_FLOAT16_VALUE(2210.00), SIMDE_FLOAT16_VALUE(2376.00), + SIMDE_FLOAT16_VALUE(1928.00), SIMDE_FLOAT16_VALUE(1858.00)}}, + {{SIMDE_FLOAT16_VALUE(54.19), SIMDE_FLOAT16_VALUE(-928.00), + SIMDE_FLOAT16_VALUE(362.50), SIMDE_FLOAT16_VALUE(-936.50), + SIMDE_FLOAT16_VALUE(185.88), SIMDE_FLOAT16_VALUE(-244.38), + SIMDE_FLOAT16_VALUE(924.50), SIMDE_FLOAT16_VALUE(-644.00)}, + {SIMDE_FLOAT16_VALUE(-517.00), SIMDE_FLOAT16_VALUE(-615.00), + SIMDE_FLOAT16_VALUE(-751.50), SIMDE_FLOAT16_VALUE(-974.00), + SIMDE_FLOAT16_VALUE(-144.38), SIMDE_FLOAT16_VALUE(338.25), + SIMDE_FLOAT16_VALUE(705.00), SIMDE_FLOAT16_VALUE(116.88)}, + {SIMDE_FLOAT16_VALUE(49.38), SIMDE_FLOAT16_VALUE(-363.00), + SIMDE_FLOAT16_VALUE(-476.25), SIMDE_FLOAT16_VALUE(106.69)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(25584.00), SIMDE_FLOAT16_VALUE(24592.00), + SIMDE_FLOAT16_VALUE(37472.00), SIMDE_FLOAT16_VALUE(36160.00), + SIMDE_FLOAT16_VALUE(7316.00), SIMDE_FLOAT16_VALUE(6884.00), + SIMDE_FLOAT16_VALUE(-33888.00), SIMDE_FLOAT16_VALUE(-35456.00)}}, + {{SIMDE_FLOAT16_VALUE(-726.00), SIMDE_FLOAT16_VALUE(-353.75), + SIMDE_FLOAT16_VALUE(268.50), SIMDE_FLOAT16_VALUE(729.00), + SIMDE_FLOAT16_VALUE(-470.25), SIMDE_FLOAT16_VALUE(81.88), + SIMDE_FLOAT16_VALUE(72.25), SIMDE_FLOAT16_VALUE(-992.50)}, + {SIMDE_FLOAT16_VALUE(-615.50), SIMDE_FLOAT16_VALUE(620.50), + SIMDE_FLOAT16_VALUE(-606.50), SIMDE_FLOAT16_VALUE(-327.75), + SIMDE_FLOAT16_VALUE(-331.75), SIMDE_FLOAT16_VALUE(-606.00), + SIMDE_FLOAT16_VALUE(-295.75), SIMDE_FLOAT16_VALUE(-275.50)}, + {SIMDE_FLOAT16_VALUE(-752.50), SIMDE_FLOAT16_VALUE(21.91), + SIMDE_FLOAT16_VALUE(827.00), SIMDE_FLOAT16_VALUE(600.00)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(12760.00), SIMDE_FLOAT16_VALUE(13128.00), + SIMDE_FLOAT16_VALUE(13552.00), SIMDE_FLOAT16_VALUE(14016.00), + SIMDE_FLOAT16_VALUE(6796.00), SIMDE_FLOAT16_VALUE(7348.00), + SIMDE_FLOAT16_VALUE(6552.00), SIMDE_FLOAT16_VALUE(5488.00)}} + }; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x8_t r; - SIMDE_CONSTIFY_2_(simde_vcmlaq_rot180_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_( + simde_vcmlaq_rot180_lane_f16, r, + (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + test_vec[i].lane, r_, a, b); - simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); - // simde_float16x8_t r = simde_vcmlaq_rot180_lane_f16(r_, a, b, test_vec[i].lane); - // simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), + 1); + // simde_float16x8_t r = simde_vcmlaq_rot180_lane_f16(r_, a, b, + // test_vec[i].lane); simde_test_arm_neon_write_f16x8(2, r, + // SIMDE_TEST_VEC_POS_LAST); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; + for (int i = 0; i < 8; i++) { simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x8_t r = simde_vcmlaq_lane_f16(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -467,8 +606,7 @@ test_simde_vcmlaq_rot180_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmlaq_rot180_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmlaq_rot180_lane_f32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[4]; @@ -477,69 +615,91 @@ test_simde_vcmlaq_rot180_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[4]; } test_vec[] = { - { { SIMDE_FLOAT32_C( 301.65), SIMDE_FLOAT32_C( 490.71), SIMDE_FLOAT32_C( -744.66), SIMDE_FLOAT32_C( -738.17) }, - { SIMDE_FLOAT32_C( -301.20), SIMDE_FLOAT32_C( -904.34), SIMDE_FLOAT32_C( 771.98), SIMDE_FLOAT32_C( 233.71) }, - { SIMDE_FLOAT32_C( 830.18), SIMDE_FLOAT32_C( 979.39) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(250351.875000), SIMDE_FLOAT32_C(250540.937500), SIMDE_FLOAT32_C(-641627.000000), SIMDE_FLOAT32_C(-641620.500000) } }, - { { SIMDE_FLOAT32_C( -38.01), SIMDE_FLOAT32_C( 151.61), SIMDE_FLOAT32_C( 201.45), SIMDE_FLOAT32_C( -747.32) }, - { SIMDE_FLOAT32_C( -331.17), SIMDE_FLOAT32_C( 7.62), SIMDE_FLOAT32_C( -454.77), SIMDE_FLOAT32_C( -381.81) }, - { SIMDE_FLOAT32_C( 236.39), SIMDE_FLOAT32_C( -158.94) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(78247.265625), SIMDE_FLOAT32_C(78436.890625), SIMDE_FLOAT32_C(107704.531250), SIMDE_FLOAT32_C(106755.757812) } }, - { { SIMDE_FLOAT32_C( 605.85), SIMDE_FLOAT32_C( 244.27), SIMDE_FLOAT32_C( -426.53), SIMDE_FLOAT32_C( -969.18) }, - { SIMDE_FLOAT32_C( 322.13), SIMDE_FLOAT32_C( 863.77), SIMDE_FLOAT32_C( -685.35), SIMDE_FLOAT32_C( -710.70) }, - { SIMDE_FLOAT32_C( -791.12), SIMDE_FLOAT32_C( 373.53) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(255449.343750), SIMDE_FLOAT32_C(255087.765625), SIMDE_FLOAT32_C(-542620.625000), SIMDE_FLOAT32_C(-543163.250000) } }, - { { SIMDE_FLOAT32_C( -606.46), SIMDE_FLOAT32_C( -507.55), SIMDE_FLOAT32_C( -68.24), SIMDE_FLOAT32_C( -823.05) }, - { SIMDE_FLOAT32_C( -359.95), SIMDE_FLOAT32_C( 611.92), SIMDE_FLOAT32_C( 514.14), SIMDE_FLOAT32_C( -660.86) }, - { SIMDE_FLOAT32_C( 181.81), SIMDE_FLOAT32_C( 115.86) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(64836.050781), SIMDE_FLOAT32_C(64934.960938), SIMDE_FLOAT32_C(-93544.031250), SIMDE_FLOAT32_C(-94298.843750) } }, - { { SIMDE_FLOAT32_C( 951.84), SIMDE_FLOAT32_C( -864.96), SIMDE_FLOAT32_C( 970.59), SIMDE_FLOAT32_C( 769.97) }, - { SIMDE_FLOAT32_C( 609.64), SIMDE_FLOAT32_C( -580.87), SIMDE_FLOAT32_C( 358.59), SIMDE_FLOAT32_C( 350.56) }, - { SIMDE_FLOAT32_C( 26.31), SIMDE_FLOAT32_C( 163.74) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-15087.788086), SIMDE_FLOAT32_C(-16904.587891), SIMDE_FLOAT32_C(-8463.912109), SIMDE_FLOAT32_C(-8664.532227) } }, - { { SIMDE_FLOAT32_C( -636.68), SIMDE_FLOAT32_C( 554.68), SIMDE_FLOAT32_C( -385.40), SIMDE_FLOAT32_C( -565.95) }, - { SIMDE_FLOAT32_C( 129.69), SIMDE_FLOAT32_C( 961.79), SIMDE_FLOAT32_C( -333.22), SIMDE_FLOAT32_C( 69.65) }, - { SIMDE_FLOAT32_C( 726.96), SIMDE_FLOAT32_C( 131.41) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-94916.125000), SIMDE_FLOAT32_C(-93724.765625), SIMDE_FLOAT32_C(241852.218750), SIMDE_FLOAT32_C(241671.671875) } }, - { { SIMDE_FLOAT32_C( -211.91), SIMDE_FLOAT32_C( 829.24), SIMDE_FLOAT32_C( -475.13), SIMDE_FLOAT32_C( 562.33) }, - { SIMDE_FLOAT32_C( 515.59), SIMDE_FLOAT32_C( -290.69), SIMDE_FLOAT32_C( -816.53), SIMDE_FLOAT32_C( 17.39) }, - { SIMDE_FLOAT32_C( 268.14), SIMDE_FLOAT32_C( 729.88) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-138462.234375), SIMDE_FLOAT32_C(-137421.078125), SIMDE_FLOAT32_C(218469.250000), SIMDE_FLOAT32_C(219506.703125) } }, - { { SIMDE_FLOAT32_C( -894.99), SIMDE_FLOAT32_C( 516.42), SIMDE_FLOAT32_C( -169.55), SIMDE_FLOAT32_C( 696.41) }, - { SIMDE_FLOAT32_C( -388.51), SIMDE_FLOAT32_C( 987.71), SIMDE_FLOAT32_C( -91.49), SIMDE_FLOAT32_C( -970.85) }, - { SIMDE_FLOAT32_C( 357.28), SIMDE_FLOAT32_C( -28.01) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(137911.859375), SIMDE_FLOAT32_C(139323.281250), SIMDE_FLOAT32_C(32517.996094), SIMDE_FLOAT32_C(33383.957031) } } - }; + {{SIMDE_FLOAT32_C(301.65), SIMDE_FLOAT32_C(490.71), + SIMDE_FLOAT32_C(-744.66), SIMDE_FLOAT32_C(-738.17)}, + {SIMDE_FLOAT32_C(-301.20), SIMDE_FLOAT32_C(-904.34), + SIMDE_FLOAT32_C(771.98), SIMDE_FLOAT32_C(233.71)}, + {SIMDE_FLOAT32_C(830.18), SIMDE_FLOAT32_C(979.39)}, + INT32_C(0), + {SIMDE_FLOAT32_C(250351.875000), SIMDE_FLOAT32_C(250540.937500), + SIMDE_FLOAT32_C(-641627.000000), SIMDE_FLOAT32_C(-641620.500000)}}, + {{SIMDE_FLOAT32_C(-38.01), SIMDE_FLOAT32_C(151.61), + SIMDE_FLOAT32_C(201.45), SIMDE_FLOAT32_C(-747.32)}, + {SIMDE_FLOAT32_C(-331.17), SIMDE_FLOAT32_C(7.62), + SIMDE_FLOAT32_C(-454.77), SIMDE_FLOAT32_C(-381.81)}, + {SIMDE_FLOAT32_C(236.39), SIMDE_FLOAT32_C(-158.94)}, + INT32_C(0), + {SIMDE_FLOAT32_C(78247.265625), SIMDE_FLOAT32_C(78436.890625), + SIMDE_FLOAT32_C(107704.531250), SIMDE_FLOAT32_C(106755.757812)}}, + {{SIMDE_FLOAT32_C(605.85), SIMDE_FLOAT32_C(244.27), + SIMDE_FLOAT32_C(-426.53), SIMDE_FLOAT32_C(-969.18)}, + {SIMDE_FLOAT32_C(322.13), SIMDE_FLOAT32_C(863.77), + SIMDE_FLOAT32_C(-685.35), SIMDE_FLOAT32_C(-710.70)}, + {SIMDE_FLOAT32_C(-791.12), SIMDE_FLOAT32_C(373.53)}, + INT32_C(0), + {SIMDE_FLOAT32_C(255449.343750), SIMDE_FLOAT32_C(255087.765625), + SIMDE_FLOAT32_C(-542620.625000), SIMDE_FLOAT32_C(-543163.250000)}}, + {{SIMDE_FLOAT32_C(-606.46), SIMDE_FLOAT32_C(-507.55), + SIMDE_FLOAT32_C(-68.24), SIMDE_FLOAT32_C(-823.05)}, + {SIMDE_FLOAT32_C(-359.95), SIMDE_FLOAT32_C(611.92), + SIMDE_FLOAT32_C(514.14), SIMDE_FLOAT32_C(-660.86)}, + {SIMDE_FLOAT32_C(181.81), SIMDE_FLOAT32_C(115.86)}, + INT32_C(0), + {SIMDE_FLOAT32_C(64836.050781), SIMDE_FLOAT32_C(64934.960938), + SIMDE_FLOAT32_C(-93544.031250), SIMDE_FLOAT32_C(-94298.843750)}}, + {{SIMDE_FLOAT32_C(951.84), SIMDE_FLOAT32_C(-864.96), + SIMDE_FLOAT32_C(970.59), SIMDE_FLOAT32_C(769.97)}, + {SIMDE_FLOAT32_C(609.64), SIMDE_FLOAT32_C(-580.87), + SIMDE_FLOAT32_C(358.59), SIMDE_FLOAT32_C(350.56)}, + {SIMDE_FLOAT32_C(26.31), SIMDE_FLOAT32_C(163.74)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-15087.788086), SIMDE_FLOAT32_C(-16904.587891), + SIMDE_FLOAT32_C(-8463.912109), SIMDE_FLOAT32_C(-8664.532227)}}, + {{SIMDE_FLOAT32_C(-636.68), SIMDE_FLOAT32_C(554.68), + SIMDE_FLOAT32_C(-385.40), SIMDE_FLOAT32_C(-565.95)}, + {SIMDE_FLOAT32_C(129.69), SIMDE_FLOAT32_C(961.79), + SIMDE_FLOAT32_C(-333.22), SIMDE_FLOAT32_C(69.65)}, + {SIMDE_FLOAT32_C(726.96), SIMDE_FLOAT32_C(131.41)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-94916.125000), SIMDE_FLOAT32_C(-93724.765625), + SIMDE_FLOAT32_C(241852.218750), SIMDE_FLOAT32_C(241671.671875)}}, + {{SIMDE_FLOAT32_C(-211.91), SIMDE_FLOAT32_C(829.24), + SIMDE_FLOAT32_C(-475.13), SIMDE_FLOAT32_C(562.33)}, + {SIMDE_FLOAT32_C(515.59), SIMDE_FLOAT32_C(-290.69), + SIMDE_FLOAT32_C(-816.53), SIMDE_FLOAT32_C(17.39)}, + {SIMDE_FLOAT32_C(268.14), SIMDE_FLOAT32_C(729.88)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-138462.234375), SIMDE_FLOAT32_C(-137421.078125), + SIMDE_FLOAT32_C(218469.250000), SIMDE_FLOAT32_C(219506.703125)}}, + {{SIMDE_FLOAT32_C(-894.99), SIMDE_FLOAT32_C(516.42), + SIMDE_FLOAT32_C(-169.55), SIMDE_FLOAT32_C(696.41)}, + {SIMDE_FLOAT32_C(-388.51), SIMDE_FLOAT32_C(987.71), + SIMDE_FLOAT32_C(-91.49), SIMDE_FLOAT32_C(-970.85)}, + {SIMDE_FLOAT32_C(357.28), SIMDE_FLOAT32_C(-28.01)}, + INT32_C(0), + {SIMDE_FLOAT32_C(137911.859375), SIMDE_FLOAT32_C(139323.281250), + SIMDE_FLOAT32_C(32517.996094), SIMDE_FLOAT32_C(33383.957031)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x4_t r = simde_vcmlaq_rot180_lane_f32(r_, a, b, 0); - simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), + 1); // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); - } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 0, 0, 0, 0, 0, 0, 0}; + for (int i = 0; i < 8; i++) { simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcmlaq_lane_f32(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -550,8 +710,7 @@ test_simde_vcmlaq_rot180_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmlaq_rot180_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmlaq_rot180_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float16_t r_[8]; @@ -560,104 +719,171 @@ test_simde_vcmlaq_rot180_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[8]; } test_vec[] = { - { { SIMDE_FLOAT16_VALUE( -30.36), SIMDE_FLOAT16_VALUE( 631.50), SIMDE_FLOAT16_VALUE( 851.00), SIMDE_FLOAT16_VALUE( -263.50), - SIMDE_FLOAT16_VALUE( 140.00), SIMDE_FLOAT16_VALUE( 859.00), SIMDE_FLOAT16_VALUE( -834.50), SIMDE_FLOAT16_VALUE( 216.12) }, - { SIMDE_FLOAT16_VALUE( 996.00), SIMDE_FLOAT16_VALUE( 529.50), SIMDE_FLOAT16_VALUE( 79.06), SIMDE_FLOAT16_VALUE( 947.00), - SIMDE_FLOAT16_VALUE( 122.00), SIMDE_FLOAT16_VALUE( -250.00), SIMDE_FLOAT16_VALUE( -361.75), SIMDE_FLOAT16_VALUE( 265.25) }, - { SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 2.71), SIMDE_FLOAT16_VALUE( 99.81), SIMDE_FLOAT16_VALUE( -137.62), - SIMDE_FLOAT16_VALUE( -761.00), SIMDE_FLOAT16_VALUE( 813.00), SIMDE_FLOAT16_VALUE( -897.50), SIMDE_FLOAT16_VALUE( 653.50) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-58464.00), SIMDE_FLOAT16_VALUE(-57792.00), SIMDE_FLOAT16_VALUE( -3786.00), SIMDE_FLOAT16_VALUE( -4900.00), - SIMDE_FLOAT16_VALUE( -7016.00), SIMDE_FLOAT16_VALUE( -6296.00), SIMDE_FLOAT16_VALUE( 20384.00), SIMDE_FLOAT16_VALUE( 21440.00) } }, - { { SIMDE_FLOAT16_VALUE( 396.00), SIMDE_FLOAT16_VALUE( 413.00), SIMDE_FLOAT16_VALUE( 514.00), SIMDE_FLOAT16_VALUE( -977.50), - SIMDE_FLOAT16_VALUE( -672.00), SIMDE_FLOAT16_VALUE( -92.12), SIMDE_FLOAT16_VALUE( -441.25), SIMDE_FLOAT16_VALUE( -374.25) }, - { SIMDE_FLOAT16_VALUE( -152.00), SIMDE_FLOAT16_VALUE( -79.56), SIMDE_FLOAT16_VALUE( -214.62), SIMDE_FLOAT16_VALUE( -615.00), - SIMDE_FLOAT16_VALUE( 79.00), SIMDE_FLOAT16_VALUE( 84.00), SIMDE_FLOAT16_VALUE( 493.00), SIMDE_FLOAT16_VALUE( -96.00) }, - { SIMDE_FLOAT16_VALUE( 104.12), SIMDE_FLOAT16_VALUE( 78.50), SIMDE_FLOAT16_VALUE( 171.50), SIMDE_FLOAT16_VALUE( -682.50), - SIMDE_FLOAT16_VALUE( 217.12), SIMDE_FLOAT16_VALUE( 49.34), SIMDE_FLOAT16_VALUE( 256.50), SIMDE_FLOAT16_VALUE( -92.06) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( 12328.00), SIMDE_FLOAT16_VALUE( 12344.00), SIMDE_FLOAT16_VALUE( 17360.00), SIMDE_FLOAT16_VALUE( 15872.00), - SIMDE_FLOAT16_VALUE( -6872.00), SIMDE_FLOAT16_VALUE( -6292.00), SIMDE_FLOAT16_VALUE(-39136.00), SIMDE_FLOAT16_VALUE(-39072.00) } }, - { { SIMDE_FLOAT16_VALUE( -728.00), SIMDE_FLOAT16_VALUE( -108.38), SIMDE_FLOAT16_VALUE( -77.88), SIMDE_FLOAT16_VALUE( -353.00), - SIMDE_FLOAT16_VALUE( -239.00), SIMDE_FLOAT16_VALUE( 704.50), SIMDE_FLOAT16_VALUE( 914.00), SIMDE_FLOAT16_VALUE( -211.12) }, - { SIMDE_FLOAT16_VALUE( -473.25), SIMDE_FLOAT16_VALUE( 74.38), SIMDE_FLOAT16_VALUE( 904.50), SIMDE_FLOAT16_VALUE( -290.50), - SIMDE_FLOAT16_VALUE( -796.00), SIMDE_FLOAT16_VALUE( 421.25), SIMDE_FLOAT16_VALUE( 215.75), SIMDE_FLOAT16_VALUE( 249.38) }, - { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -720.00), SIMDE_FLOAT16_VALUE( 32.75), SIMDE_FLOAT16_VALUE( -487.75), - SIMDE_FLOAT16_VALUE( -705.50), SIMDE_FLOAT16_VALUE( -468.00), SIMDE_FLOAT16_VALUE( -789.00), SIMDE_FLOAT16_VALUE( -866.00) }, - INT32_C( 2), - { SIMDE_FLOAT16_VALUE( 14768.00), SIMDE_FLOAT16_VALUE( 15392.00), SIMDE_FLOAT16_VALUE(-29696.00), SIMDE_FLOAT16_VALUE(-29968.00), - SIMDE_FLOAT16_VALUE( 25824.00), SIMDE_FLOAT16_VALUE( 26768.00), SIMDE_FLOAT16_VALUE( -6152.00), SIMDE_FLOAT16_VALUE( -7276.00) } }, - { { SIMDE_FLOAT16_VALUE( -891.50), SIMDE_FLOAT16_VALUE( -299.00), SIMDE_FLOAT16_VALUE( -595.00), SIMDE_FLOAT16_VALUE( -662.00), - SIMDE_FLOAT16_VALUE( -914.00), SIMDE_FLOAT16_VALUE( 674.50), SIMDE_FLOAT16_VALUE( 771.50), SIMDE_FLOAT16_VALUE( 14.33) }, - { SIMDE_FLOAT16_VALUE( 880.00), SIMDE_FLOAT16_VALUE( 767.00), SIMDE_FLOAT16_VALUE( -738.50), SIMDE_FLOAT16_VALUE( 581.50), - SIMDE_FLOAT16_VALUE( -342.00), SIMDE_FLOAT16_VALUE( 580.50), SIMDE_FLOAT16_VALUE( 534.00), SIMDE_FLOAT16_VALUE( -671.00) }, - { SIMDE_FLOAT16_VALUE( -482.75), SIMDE_FLOAT16_VALUE( 382.25), SIMDE_FLOAT16_VALUE( 503.00), SIMDE_FLOAT16_VALUE( 35.00), - SIMDE_FLOAT16_VALUE( 315.50), SIMDE_FLOAT16_VALUE( -23.56), SIMDE_FLOAT16_VALUE( 53.88), SIMDE_FLOAT16_VALUE( 722.00) }, - INT32_C( 3), - { SIMDE_FLOAT16_VALUE(-31696.00), SIMDE_FLOAT16_VALUE(-31104.00), SIMDE_FLOAT16_VALUE( 25248.00), SIMDE_FLOAT16_VALUE( 25184.00), - SIMDE_FLOAT16_VALUE( 11056.00), SIMDE_FLOAT16_VALUE( 12648.00), SIMDE_FLOAT16_VALUE(-17920.00), SIMDE_FLOAT16_VALUE(-18672.00) } }, - { { SIMDE_FLOAT16_VALUE( 525.50), SIMDE_FLOAT16_VALUE( -679.00), SIMDE_FLOAT16_VALUE( 491.50), SIMDE_FLOAT16_VALUE( -505.00), - SIMDE_FLOAT16_VALUE( 914.50), SIMDE_FLOAT16_VALUE( -312.00), SIMDE_FLOAT16_VALUE( -404.50), SIMDE_FLOAT16_VALUE( -634.00) }, - { SIMDE_FLOAT16_VALUE( -86.62), SIMDE_FLOAT16_VALUE( -914.50), SIMDE_FLOAT16_VALUE( -839.50), SIMDE_FLOAT16_VALUE( 817.50), - SIMDE_FLOAT16_VALUE( -187.25), SIMDE_FLOAT16_VALUE( 422.75), SIMDE_FLOAT16_VALUE( 604.50), SIMDE_FLOAT16_VALUE( -735.00) }, - { SIMDE_FLOAT16_VALUE( -21.31), SIMDE_FLOAT16_VALUE( -29.59), SIMDE_FLOAT16_VALUE( -725.00), SIMDE_FLOAT16_VALUE( -503.00), - SIMDE_FLOAT16_VALUE( -75.56), SIMDE_FLOAT16_VALUE( 215.38), SIMDE_FLOAT16_VALUE( -742.00), SIMDE_FLOAT16_VALUE( -854.00) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( -1321.00), SIMDE_FLOAT16_VALUE( -2526.00), SIMDE_FLOAT16_VALUE(-17408.00), SIMDE_FLOAT16_VALUE(-18400.00), - SIMDE_FLOAT16_VALUE( -3076.00), SIMDE_FLOAT16_VALUE( -4304.00), SIMDE_FLOAT16_VALUE( 12480.00), SIMDE_FLOAT16_VALUE( 12248.00) } }, - { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( 164.88), SIMDE_FLOAT16_VALUE( 304.75), SIMDE_FLOAT16_VALUE( -530.50), - SIMDE_FLOAT16_VALUE( -603.50), SIMDE_FLOAT16_VALUE( 730.50), SIMDE_FLOAT16_VALUE( 46.66), SIMDE_FLOAT16_VALUE( 629.00) }, - { SIMDE_FLOAT16_VALUE( -537.00), SIMDE_FLOAT16_VALUE( 637.00), SIMDE_FLOAT16_VALUE( 884.50), SIMDE_FLOAT16_VALUE( 378.25), - SIMDE_FLOAT16_VALUE( -10.17), SIMDE_FLOAT16_VALUE( -730.00), SIMDE_FLOAT16_VALUE( -981.50), SIMDE_FLOAT16_VALUE( 453.25) }, - { SIMDE_FLOAT16_VALUE( -575.00), SIMDE_FLOAT16_VALUE( 12.88), SIMDE_FLOAT16_VALUE( -667.50), SIMDE_FLOAT16_VALUE( 380.50), - SIMDE_FLOAT16_VALUE( 374.75), SIMDE_FLOAT16_VALUE( -222.50), SIMDE_FLOAT16_VALUE( 206.88), SIMDE_FLOAT16_VALUE( 502.25) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( 6300.00), SIMDE_FLOAT16_VALUE( 7084.00), SIMDE_FLOAT16_VALUE(-11088.00), SIMDE_FLOAT16_VALUE(-11928.00), - SIMDE_FLOAT16_VALUE( -472.50), SIMDE_FLOAT16_VALUE( 861.50), SIMDE_FLOAT16_VALUE( 12688.00), SIMDE_FLOAT16_VALUE( 13272.00) } }, - { { SIMDE_FLOAT16_VALUE( -825.50), SIMDE_FLOAT16_VALUE( -472.75), SIMDE_FLOAT16_VALUE( -531.00), SIMDE_FLOAT16_VALUE( -366.75), - SIMDE_FLOAT16_VALUE( 143.12), SIMDE_FLOAT16_VALUE( 698.50), SIMDE_FLOAT16_VALUE( 700.00), SIMDE_FLOAT16_VALUE( 498.25) }, - { SIMDE_FLOAT16_VALUE( 908.00), SIMDE_FLOAT16_VALUE( 845.50), SIMDE_FLOAT16_VALUE( -383.50), SIMDE_FLOAT16_VALUE( 383.50), - SIMDE_FLOAT16_VALUE( 357.75), SIMDE_FLOAT16_VALUE( -900.50), SIMDE_FLOAT16_VALUE( -802.00), SIMDE_FLOAT16_VALUE( 966.50) }, - { SIMDE_FLOAT16_VALUE( -993.00), SIMDE_FLOAT16_VALUE( 477.50), SIMDE_FLOAT16_VALUE( -23.00), SIMDE_FLOAT16_VALUE( 102.38), - SIMDE_FLOAT16_VALUE( 988.50), SIMDE_FLOAT16_VALUE( -311.75), SIMDE_FLOAT16_VALUE( -668.50), SIMDE_FLOAT16_VALUE( 148.25) }, - INT32_C( 2), - { SIMDE_FLOAT16_VALUE( 20064.00), SIMDE_FLOAT16_VALUE( 20416.00), SIMDE_FLOAT16_VALUE( -9352.00), SIMDE_FLOAT16_VALUE( -9184.00), - SIMDE_FLOAT16_VALUE( 8368.00), SIMDE_FLOAT16_VALUE( 8928.00), SIMDE_FLOAT16_VALUE(-17744.00), SIMDE_FLOAT16_VALUE(-17952.00) } }, - { { SIMDE_FLOAT16_VALUE( 213.88), SIMDE_FLOAT16_VALUE( 337.75), SIMDE_FLOAT16_VALUE( 330.50), SIMDE_FLOAT16_VALUE( -88.56), - SIMDE_FLOAT16_VALUE( 191.12), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 775.50) }, - { SIMDE_FLOAT16_VALUE( 295.50), SIMDE_FLOAT16_VALUE( 687.00), SIMDE_FLOAT16_VALUE( 406.25), SIMDE_FLOAT16_VALUE( 439.50), - SIMDE_FLOAT16_VALUE( -827.50), SIMDE_FLOAT16_VALUE( 733.00), SIMDE_FLOAT16_VALUE( 499.00), SIMDE_FLOAT16_VALUE( 931.00) }, - { SIMDE_FLOAT16_VALUE( 790.00), SIMDE_FLOAT16_VALUE( -979.00), SIMDE_FLOAT16_VALUE( 70.62), SIMDE_FLOAT16_VALUE( -47.00), - SIMDE_FLOAT16_VALUE( 228.50), SIMDE_FLOAT16_VALUE( -233.50), SIMDE_FLOAT16_VALUE( -467.50), SIMDE_FLOAT16_VALUE( 545.00) }, - INT32_C( 3), - { SIMDE_FLOAT16_VALUE( 14104.00), SIMDE_FLOAT16_VALUE( 14224.00), SIMDE_FLOAT16_VALUE( 19424.00), SIMDE_FLOAT16_VALUE( 19008.00), - SIMDE_FLOAT16_VALUE(-38688.00), SIMDE_FLOAT16_VALUE(-39520.00), SIMDE_FLOAT16_VALUE( 23344.00), SIMDE_FLOAT16_VALUE( 24224.00) } } - }; + {{SIMDE_FLOAT16_VALUE(-30.36), SIMDE_FLOAT16_VALUE(631.50), + SIMDE_FLOAT16_VALUE(851.00), SIMDE_FLOAT16_VALUE(-263.50), + SIMDE_FLOAT16_VALUE(140.00), SIMDE_FLOAT16_VALUE(859.00), + SIMDE_FLOAT16_VALUE(-834.50), SIMDE_FLOAT16_VALUE(216.12)}, + {SIMDE_FLOAT16_VALUE(996.00), SIMDE_FLOAT16_VALUE(529.50), + SIMDE_FLOAT16_VALUE(79.06), SIMDE_FLOAT16_VALUE(947.00), + SIMDE_FLOAT16_VALUE(122.00), SIMDE_FLOAT16_VALUE(-250.00), + SIMDE_FLOAT16_VALUE(-361.75), SIMDE_FLOAT16_VALUE(265.25)}, + {SIMDE_FLOAT16_VALUE(58.66), SIMDE_FLOAT16_VALUE(2.71), + SIMDE_FLOAT16_VALUE(99.81), SIMDE_FLOAT16_VALUE(-137.62), + SIMDE_FLOAT16_VALUE(-761.00), SIMDE_FLOAT16_VALUE(813.00), + SIMDE_FLOAT16_VALUE(-897.50), SIMDE_FLOAT16_VALUE(653.50)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-58464.00), SIMDE_FLOAT16_VALUE(-57792.00), + SIMDE_FLOAT16_VALUE(-3786.00), SIMDE_FLOAT16_VALUE(-4900.00), + SIMDE_FLOAT16_VALUE(-7016.00), SIMDE_FLOAT16_VALUE(-6296.00), + SIMDE_FLOAT16_VALUE(20384.00), SIMDE_FLOAT16_VALUE(21440.00)}}, + {{SIMDE_FLOAT16_VALUE(396.00), SIMDE_FLOAT16_VALUE(413.00), + SIMDE_FLOAT16_VALUE(514.00), SIMDE_FLOAT16_VALUE(-977.50), + SIMDE_FLOAT16_VALUE(-672.00), SIMDE_FLOAT16_VALUE(-92.12), + SIMDE_FLOAT16_VALUE(-441.25), SIMDE_FLOAT16_VALUE(-374.25)}, + {SIMDE_FLOAT16_VALUE(-152.00), SIMDE_FLOAT16_VALUE(-79.56), + SIMDE_FLOAT16_VALUE(-214.62), SIMDE_FLOAT16_VALUE(-615.00), + SIMDE_FLOAT16_VALUE(79.00), SIMDE_FLOAT16_VALUE(84.00), + SIMDE_FLOAT16_VALUE(493.00), SIMDE_FLOAT16_VALUE(-96.00)}, + {SIMDE_FLOAT16_VALUE(104.12), SIMDE_FLOAT16_VALUE(78.50), + SIMDE_FLOAT16_VALUE(171.50), SIMDE_FLOAT16_VALUE(-682.50), + SIMDE_FLOAT16_VALUE(217.12), SIMDE_FLOAT16_VALUE(49.34), + SIMDE_FLOAT16_VALUE(256.50), SIMDE_FLOAT16_VALUE(-92.06)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(12328.00), SIMDE_FLOAT16_VALUE(12344.00), + SIMDE_FLOAT16_VALUE(17360.00), SIMDE_FLOAT16_VALUE(15872.00), + SIMDE_FLOAT16_VALUE(-6872.00), SIMDE_FLOAT16_VALUE(-6292.00), + SIMDE_FLOAT16_VALUE(-39136.00), SIMDE_FLOAT16_VALUE(-39072.00)}}, + {{SIMDE_FLOAT16_VALUE(-728.00), SIMDE_FLOAT16_VALUE(-108.38), + SIMDE_FLOAT16_VALUE(-77.88), SIMDE_FLOAT16_VALUE(-353.00), + SIMDE_FLOAT16_VALUE(-239.00), SIMDE_FLOAT16_VALUE(704.50), + SIMDE_FLOAT16_VALUE(914.00), SIMDE_FLOAT16_VALUE(-211.12)}, + {SIMDE_FLOAT16_VALUE(-473.25), SIMDE_FLOAT16_VALUE(74.38), + SIMDE_FLOAT16_VALUE(904.50), SIMDE_FLOAT16_VALUE(-290.50), + SIMDE_FLOAT16_VALUE(-796.00), SIMDE_FLOAT16_VALUE(421.25), + SIMDE_FLOAT16_VALUE(215.75), SIMDE_FLOAT16_VALUE(249.38)}, + {SIMDE_FLOAT16_VALUE(-523.00), SIMDE_FLOAT16_VALUE(-720.00), + SIMDE_FLOAT16_VALUE(32.75), SIMDE_FLOAT16_VALUE(-487.75), + SIMDE_FLOAT16_VALUE(-705.50), SIMDE_FLOAT16_VALUE(-468.00), + SIMDE_FLOAT16_VALUE(-789.00), SIMDE_FLOAT16_VALUE(-866.00)}, + INT32_C(2), + {SIMDE_FLOAT16_VALUE(14768.00), SIMDE_FLOAT16_VALUE(15392.00), + SIMDE_FLOAT16_VALUE(-29696.00), SIMDE_FLOAT16_VALUE(-29968.00), + SIMDE_FLOAT16_VALUE(25824.00), SIMDE_FLOAT16_VALUE(26768.00), + SIMDE_FLOAT16_VALUE(-6152.00), SIMDE_FLOAT16_VALUE(-7276.00)}}, + {{SIMDE_FLOAT16_VALUE(-891.50), SIMDE_FLOAT16_VALUE(-299.00), + SIMDE_FLOAT16_VALUE(-595.00), SIMDE_FLOAT16_VALUE(-662.00), + SIMDE_FLOAT16_VALUE(-914.00), SIMDE_FLOAT16_VALUE(674.50), + SIMDE_FLOAT16_VALUE(771.50), SIMDE_FLOAT16_VALUE(14.33)}, + {SIMDE_FLOAT16_VALUE(880.00), SIMDE_FLOAT16_VALUE(767.00), + SIMDE_FLOAT16_VALUE(-738.50), SIMDE_FLOAT16_VALUE(581.50), + SIMDE_FLOAT16_VALUE(-342.00), SIMDE_FLOAT16_VALUE(580.50), + SIMDE_FLOAT16_VALUE(534.00), SIMDE_FLOAT16_VALUE(-671.00)}, + {SIMDE_FLOAT16_VALUE(-482.75), SIMDE_FLOAT16_VALUE(382.25), + SIMDE_FLOAT16_VALUE(503.00), SIMDE_FLOAT16_VALUE(35.00), + SIMDE_FLOAT16_VALUE(315.50), SIMDE_FLOAT16_VALUE(-23.56), + SIMDE_FLOAT16_VALUE(53.88), SIMDE_FLOAT16_VALUE(722.00)}, + INT32_C(3), + {SIMDE_FLOAT16_VALUE(-31696.00), SIMDE_FLOAT16_VALUE(-31104.00), + SIMDE_FLOAT16_VALUE(25248.00), SIMDE_FLOAT16_VALUE(25184.00), + SIMDE_FLOAT16_VALUE(11056.00), SIMDE_FLOAT16_VALUE(12648.00), + SIMDE_FLOAT16_VALUE(-17920.00), SIMDE_FLOAT16_VALUE(-18672.00)}}, + {{SIMDE_FLOAT16_VALUE(525.50), SIMDE_FLOAT16_VALUE(-679.00), + SIMDE_FLOAT16_VALUE(491.50), SIMDE_FLOAT16_VALUE(-505.00), + SIMDE_FLOAT16_VALUE(914.50), SIMDE_FLOAT16_VALUE(-312.00), + SIMDE_FLOAT16_VALUE(-404.50), SIMDE_FLOAT16_VALUE(-634.00)}, + {SIMDE_FLOAT16_VALUE(-86.62), SIMDE_FLOAT16_VALUE(-914.50), + SIMDE_FLOAT16_VALUE(-839.50), SIMDE_FLOAT16_VALUE(817.50), + SIMDE_FLOAT16_VALUE(-187.25), SIMDE_FLOAT16_VALUE(422.75), + SIMDE_FLOAT16_VALUE(604.50), SIMDE_FLOAT16_VALUE(-735.00)}, + {SIMDE_FLOAT16_VALUE(-21.31), SIMDE_FLOAT16_VALUE(-29.59), + SIMDE_FLOAT16_VALUE(-725.00), SIMDE_FLOAT16_VALUE(-503.00), + SIMDE_FLOAT16_VALUE(-75.56), SIMDE_FLOAT16_VALUE(215.38), + SIMDE_FLOAT16_VALUE(-742.00), SIMDE_FLOAT16_VALUE(-854.00)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-1321.00), SIMDE_FLOAT16_VALUE(-2526.00), + SIMDE_FLOAT16_VALUE(-17408.00), SIMDE_FLOAT16_VALUE(-18400.00), + SIMDE_FLOAT16_VALUE(-3076.00), SIMDE_FLOAT16_VALUE(-4304.00), + SIMDE_FLOAT16_VALUE(12480.00), SIMDE_FLOAT16_VALUE(12248.00)}}, + {{SIMDE_FLOAT16_VALUE(-618.50), SIMDE_FLOAT16_VALUE(164.88), + SIMDE_FLOAT16_VALUE(304.75), SIMDE_FLOAT16_VALUE(-530.50), + SIMDE_FLOAT16_VALUE(-603.50), SIMDE_FLOAT16_VALUE(730.50), + SIMDE_FLOAT16_VALUE(46.66), SIMDE_FLOAT16_VALUE(629.00)}, + {SIMDE_FLOAT16_VALUE(-537.00), SIMDE_FLOAT16_VALUE(637.00), + SIMDE_FLOAT16_VALUE(884.50), SIMDE_FLOAT16_VALUE(378.25), + SIMDE_FLOAT16_VALUE(-10.17), SIMDE_FLOAT16_VALUE(-730.00), + SIMDE_FLOAT16_VALUE(-981.50), SIMDE_FLOAT16_VALUE(453.25)}, + {SIMDE_FLOAT16_VALUE(-575.00), SIMDE_FLOAT16_VALUE(12.88), + SIMDE_FLOAT16_VALUE(-667.50), SIMDE_FLOAT16_VALUE(380.50), + SIMDE_FLOAT16_VALUE(374.75), SIMDE_FLOAT16_VALUE(-222.50), + SIMDE_FLOAT16_VALUE(206.88), SIMDE_FLOAT16_VALUE(502.25)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(6300.00), SIMDE_FLOAT16_VALUE(7084.00), + SIMDE_FLOAT16_VALUE(-11088.00), SIMDE_FLOAT16_VALUE(-11928.00), + SIMDE_FLOAT16_VALUE(-472.50), SIMDE_FLOAT16_VALUE(861.50), + SIMDE_FLOAT16_VALUE(12688.00), SIMDE_FLOAT16_VALUE(13272.00)}}, + {{SIMDE_FLOAT16_VALUE(-825.50), SIMDE_FLOAT16_VALUE(-472.75), + SIMDE_FLOAT16_VALUE(-531.00), SIMDE_FLOAT16_VALUE(-366.75), + SIMDE_FLOAT16_VALUE(143.12), SIMDE_FLOAT16_VALUE(698.50), + SIMDE_FLOAT16_VALUE(700.00), SIMDE_FLOAT16_VALUE(498.25)}, + {SIMDE_FLOAT16_VALUE(908.00), SIMDE_FLOAT16_VALUE(845.50), + SIMDE_FLOAT16_VALUE(-383.50), SIMDE_FLOAT16_VALUE(383.50), + SIMDE_FLOAT16_VALUE(357.75), SIMDE_FLOAT16_VALUE(-900.50), + SIMDE_FLOAT16_VALUE(-802.00), SIMDE_FLOAT16_VALUE(966.50)}, + {SIMDE_FLOAT16_VALUE(-993.00), SIMDE_FLOAT16_VALUE(477.50), + SIMDE_FLOAT16_VALUE(-23.00), SIMDE_FLOAT16_VALUE(102.38), + SIMDE_FLOAT16_VALUE(988.50), SIMDE_FLOAT16_VALUE(-311.75), + SIMDE_FLOAT16_VALUE(-668.50), SIMDE_FLOAT16_VALUE(148.25)}, + INT32_C(2), + {SIMDE_FLOAT16_VALUE(20064.00), SIMDE_FLOAT16_VALUE(20416.00), + SIMDE_FLOAT16_VALUE(-9352.00), SIMDE_FLOAT16_VALUE(-9184.00), + SIMDE_FLOAT16_VALUE(8368.00), SIMDE_FLOAT16_VALUE(8928.00), + SIMDE_FLOAT16_VALUE(-17744.00), SIMDE_FLOAT16_VALUE(-17952.00)}}, + {{SIMDE_FLOAT16_VALUE(213.88), SIMDE_FLOAT16_VALUE(337.75), + SIMDE_FLOAT16_VALUE(330.50), SIMDE_FLOAT16_VALUE(-88.56), + SIMDE_FLOAT16_VALUE(191.12), SIMDE_FLOAT16_VALUE(-615.00), + SIMDE_FLOAT16_VALUE(-113.25), SIMDE_FLOAT16_VALUE(775.50)}, + {SIMDE_FLOAT16_VALUE(295.50), SIMDE_FLOAT16_VALUE(687.00), + SIMDE_FLOAT16_VALUE(406.25), SIMDE_FLOAT16_VALUE(439.50), + SIMDE_FLOAT16_VALUE(-827.50), SIMDE_FLOAT16_VALUE(733.00), + SIMDE_FLOAT16_VALUE(499.00), SIMDE_FLOAT16_VALUE(931.00)}, + {SIMDE_FLOAT16_VALUE(790.00), SIMDE_FLOAT16_VALUE(-979.00), + SIMDE_FLOAT16_VALUE(70.62), SIMDE_FLOAT16_VALUE(-47.00), + SIMDE_FLOAT16_VALUE(228.50), SIMDE_FLOAT16_VALUE(-233.50), + SIMDE_FLOAT16_VALUE(-467.50), SIMDE_FLOAT16_VALUE(545.00)}, + INT32_C(3), + {SIMDE_FLOAT16_VALUE(14104.00), SIMDE_FLOAT16_VALUE(14224.00), + SIMDE_FLOAT16_VALUE(19424.00), SIMDE_FLOAT16_VALUE(19008.00), + SIMDE_FLOAT16_VALUE(-38688.00), SIMDE_FLOAT16_VALUE(-39520.00), + SIMDE_FLOAT16_VALUE(23344.00), SIMDE_FLOAT16_VALUE(24224.00)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); - - // simde_float16x8_t r = simde_vcmlaq_rot180_laneq_f16(r_, a, b, test_vec[i].lane); - // simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + + // simde_float16x8_t r = simde_vcmlaq_rot180_laneq_f16(r_, a, b, + // test_vec[i].lane); simde_test_arm_neon_write_f16x8(2, r, + // SIMDE_TEST_VEC_POS_LAST); simde_float16x8_t r; - SIMDE_CONSTIFY_4_(simde_vcmlaq_rot180_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); - simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + SIMDE_CONSTIFY_4_( + simde_vcmlaq_rot180_laneq_f16, r, + (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), + 1); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 2, 3, 0, 1, 2, 3}; + for (int i = 0; i < 8; i++) { simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t r = simde_vcmlaq_laneq_f16(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -668,8 +894,7 @@ test_simde_vcmlaq_rot180_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmlaq_rot180_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmlaq_rot180_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[4]; @@ -678,71 +903,106 @@ test_simde_vcmlaq_rot180_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[4]; } test_vec[] = { - { { SIMDE_FLOAT32_C( 355.18), SIMDE_FLOAT32_C( 169.63), SIMDE_FLOAT32_C( 116.87), SIMDE_FLOAT32_C( -467.19) }, - { SIMDE_FLOAT32_C( -513.94), SIMDE_FLOAT32_C( -999.71), SIMDE_FLOAT32_C( -285.25), SIMDE_FLOAT32_C( -931.88) }, - { SIMDE_FLOAT32_C( -839.57), SIMDE_FLOAT32_C( -681.40), SIMDE_FLOAT32_C( -117.60), SIMDE_FLOAT32_C( -459.86) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-431133.437500), SIMDE_FLOAT32_C(-431318.968750), SIMDE_FLOAT32_C(-239370.468750), SIMDE_FLOAT32_C(-239954.531250) } }, - { { SIMDE_FLOAT32_C( -57.67), SIMDE_FLOAT32_C( -897.07), SIMDE_FLOAT32_C( 118.98), SIMDE_FLOAT32_C( -387.92) }, - { SIMDE_FLOAT32_C( -362.79), SIMDE_FLOAT32_C( 160.99), SIMDE_FLOAT32_C( -2.72), SIMDE_FLOAT32_C( 206.65) }, - { SIMDE_FLOAT32_C( 49.34), SIMDE_FLOAT32_C( 511.85), SIMDE_FLOAT32_C( 547.20), SIMDE_FLOAT32_C( -119.58) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(185636.390625), SIMDE_FLOAT32_C(184797.000000), SIMDE_FLOAT32_C(1511.212036), SIMDE_FLOAT32_C(1004.312012) } }, - { { SIMDE_FLOAT32_C( -219.54), SIMDE_FLOAT32_C( -959.14), SIMDE_FLOAT32_C( 943.92), SIMDE_FLOAT32_C( 628.48) }, - { SIMDE_FLOAT32_C( 446.65), SIMDE_FLOAT32_C( -500.77), SIMDE_FLOAT32_C( -347.79), SIMDE_FLOAT32_C( 813.11) }, - { SIMDE_FLOAT32_C( -542.25), SIMDE_FLOAT32_C( 232.48), SIMDE_FLOAT32_C( 684.35), SIMDE_FLOAT32_C( 710.26) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(241976.421875), SIMDE_FLOAT32_C(241236.812500), SIMDE_FLOAT32_C(-187645.218750), SIMDE_FLOAT32_C(-187960.656250) } }, - { { SIMDE_FLOAT32_C( 783.09), SIMDE_FLOAT32_C( -727.02), SIMDE_FLOAT32_C( -586.46), SIMDE_FLOAT32_C( 64.33) }, - { SIMDE_FLOAT32_C( -490.08), SIMDE_FLOAT32_C( 740.49), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -759.78) }, - { SIMDE_FLOAT32_C( -380.84), SIMDE_FLOAT32_C( 993.01), SIMDE_FLOAT32_C( -759.56), SIMDE_FLOAT32_C( 861.16) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(487437.437500), SIMDE_FLOAT32_C(485927.312500), SIMDE_FLOAT32_C(586838.562500), SIMDE_FLOAT32_C(587489.312500) } }, - { { SIMDE_FLOAT32_C( 998.31), SIMDE_FLOAT32_C( 538.40), SIMDE_FLOAT32_C( -191.12), SIMDE_FLOAT32_C( -434.48) }, - { SIMDE_FLOAT32_C( 592.83), SIMDE_FLOAT32_C( 820.32), SIMDE_FLOAT32_C( -296.84), SIMDE_FLOAT32_C( -612.30) }, - { SIMDE_FLOAT32_C( -552.34), SIMDE_FLOAT32_C( 329.08), SIMDE_FLOAT32_C( 765.26), SIMDE_FLOAT32_C( -531.08) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(328442.062500), SIMDE_FLOAT32_C(327982.156250), SIMDE_FLOAT32_C(-164147.734375), SIMDE_FLOAT32_C(-164391.093750) } }, - { { SIMDE_FLOAT32_C( 52.61), SIMDE_FLOAT32_C( 606.93), SIMDE_FLOAT32_C( -894.22), SIMDE_FLOAT32_C( -854.38) }, - { SIMDE_FLOAT32_C( 972.80), SIMDE_FLOAT32_C( -807.39), SIMDE_FLOAT32_C( 668.59), SIMDE_FLOAT32_C( -228.19) }, - { SIMDE_FLOAT32_C( -528.51), SIMDE_FLOAT32_C( 730.93), SIMDE_FLOAT32_C( -230.95), SIMDE_FLOAT32_C( -140.17) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(-710996.062500), SIMDE_FLOAT32_C(-710441.750000), SIMDE_FLOAT32_C(-489586.718750), SIMDE_FLOAT32_C(-489546.875000) } }, - { { SIMDE_FLOAT32_C( 556.73), SIMDE_FLOAT32_C( -701.90), SIMDE_FLOAT32_C( -356.50), SIMDE_FLOAT32_C( -532.42) }, - { SIMDE_FLOAT32_C( 856.94), SIMDE_FLOAT32_C( -261.67), SIMDE_FLOAT32_C( -208.07), SIMDE_FLOAT32_C( 27.93) }, - { SIMDE_FLOAT32_C( 924.32), SIMDE_FLOAT32_C( -863.60), SIMDE_FLOAT32_C( -687.65), SIMDE_FLOAT32_C( 238.39) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-791530.062500), SIMDE_FLOAT32_C(-792788.687500), SIMDE_FLOAT32_C(191966.765625), SIMDE_FLOAT32_C(191790.843750) } }, - { { SIMDE_FLOAT32_C( -286.79), SIMDE_FLOAT32_C( 630.61), SIMDE_FLOAT32_C( -989.22), SIMDE_FLOAT32_C( 223.21) }, - { SIMDE_FLOAT32_C( 812.31), SIMDE_FLOAT32_C( 667.33), SIMDE_FLOAT32_C( 841.41), SIMDE_FLOAT32_C( 735.52) }, - { SIMDE_FLOAT32_C( 308.52), SIMDE_FLOAT32_C( -189.06), SIMDE_FLOAT32_C( -63.33), SIMDE_FLOAT32_C( 837.76) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(153288.531250), SIMDE_FLOAT32_C(154205.937500), SIMDE_FLOAT32_C(158087.750000), SIMDE_FLOAT32_C(159300.171875) } } - }; + {{SIMDE_FLOAT32_C(355.18), SIMDE_FLOAT32_C(169.63), + SIMDE_FLOAT32_C(116.87), SIMDE_FLOAT32_C(-467.19)}, + {SIMDE_FLOAT32_C(-513.94), SIMDE_FLOAT32_C(-999.71), + SIMDE_FLOAT32_C(-285.25), SIMDE_FLOAT32_C(-931.88)}, + {SIMDE_FLOAT32_C(-839.57), SIMDE_FLOAT32_C(-681.40), + SIMDE_FLOAT32_C(-117.60), SIMDE_FLOAT32_C(-459.86)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-431133.437500), SIMDE_FLOAT32_C(-431318.968750), + SIMDE_FLOAT32_C(-239370.468750), SIMDE_FLOAT32_C(-239954.531250)}}, + {{SIMDE_FLOAT32_C(-57.67), SIMDE_FLOAT32_C(-897.07), + SIMDE_FLOAT32_C(118.98), SIMDE_FLOAT32_C(-387.92)}, + {SIMDE_FLOAT32_C(-362.79), SIMDE_FLOAT32_C(160.99), + SIMDE_FLOAT32_C(-2.72), SIMDE_FLOAT32_C(206.65)}, + {SIMDE_FLOAT32_C(49.34), SIMDE_FLOAT32_C(511.85), + SIMDE_FLOAT32_C(547.20), SIMDE_FLOAT32_C(-119.58)}, + INT32_C(1), + {SIMDE_FLOAT32_C(185636.390625), SIMDE_FLOAT32_C(184797.000000), + SIMDE_FLOAT32_C(1511.212036), SIMDE_FLOAT32_C(1004.312012)}}, + {{SIMDE_FLOAT32_C(-219.54), SIMDE_FLOAT32_C(-959.14), + SIMDE_FLOAT32_C(943.92), SIMDE_FLOAT32_C(628.48)}, + {SIMDE_FLOAT32_C(446.65), SIMDE_FLOAT32_C(-500.77), + SIMDE_FLOAT32_C(-347.79), SIMDE_FLOAT32_C(813.11)}, + {SIMDE_FLOAT32_C(-542.25), SIMDE_FLOAT32_C(232.48), + SIMDE_FLOAT32_C(684.35), SIMDE_FLOAT32_C(710.26)}, + INT32_C(0), + {SIMDE_FLOAT32_C(241976.421875), SIMDE_FLOAT32_C(241236.812500), + SIMDE_FLOAT32_C(-187645.218750), SIMDE_FLOAT32_C(-187960.656250)}}, + {{SIMDE_FLOAT32_C(783.09), SIMDE_FLOAT32_C(-727.02), + SIMDE_FLOAT32_C(-586.46), SIMDE_FLOAT32_C(64.33)}, + {SIMDE_FLOAT32_C(-490.08), SIMDE_FLOAT32_C(740.49), + SIMDE_FLOAT32_C(-591.56), SIMDE_FLOAT32_C(-759.78)}, + {SIMDE_FLOAT32_C(-380.84), SIMDE_FLOAT32_C(993.01), + SIMDE_FLOAT32_C(-759.56), SIMDE_FLOAT32_C(861.16)}, + INT32_C(1), + {SIMDE_FLOAT32_C(487437.437500), SIMDE_FLOAT32_C(485927.312500), + SIMDE_FLOAT32_C(586838.562500), SIMDE_FLOAT32_C(587489.312500)}}, + {{SIMDE_FLOAT32_C(998.31), SIMDE_FLOAT32_C(538.40), + SIMDE_FLOAT32_C(-191.12), SIMDE_FLOAT32_C(-434.48)}, + {SIMDE_FLOAT32_C(592.83), SIMDE_FLOAT32_C(820.32), + SIMDE_FLOAT32_C(-296.84), SIMDE_FLOAT32_C(-612.30)}, + {SIMDE_FLOAT32_C(-552.34), SIMDE_FLOAT32_C(329.08), + SIMDE_FLOAT32_C(765.26), SIMDE_FLOAT32_C(-531.08)}, + INT32_C(0), + {SIMDE_FLOAT32_C(328442.062500), SIMDE_FLOAT32_C(327982.156250), + SIMDE_FLOAT32_C(-164147.734375), SIMDE_FLOAT32_C(-164391.093750)}}, + {{SIMDE_FLOAT32_C(52.61), SIMDE_FLOAT32_C(606.93), + SIMDE_FLOAT32_C(-894.22), SIMDE_FLOAT32_C(-854.38)}, + {SIMDE_FLOAT32_C(972.80), SIMDE_FLOAT32_C(-807.39), + SIMDE_FLOAT32_C(668.59), SIMDE_FLOAT32_C(-228.19)}, + {SIMDE_FLOAT32_C(-528.51), SIMDE_FLOAT32_C(730.93), + SIMDE_FLOAT32_C(-230.95), SIMDE_FLOAT32_C(-140.17)}, + INT32_C(1), + {SIMDE_FLOAT32_C(-710996.062500), SIMDE_FLOAT32_C(-710441.750000), + SIMDE_FLOAT32_C(-489586.718750), SIMDE_FLOAT32_C(-489546.875000)}}, + {{SIMDE_FLOAT32_C(556.73), SIMDE_FLOAT32_C(-701.90), + SIMDE_FLOAT32_C(-356.50), SIMDE_FLOAT32_C(-532.42)}, + {SIMDE_FLOAT32_C(856.94), SIMDE_FLOAT32_C(-261.67), + SIMDE_FLOAT32_C(-208.07), SIMDE_FLOAT32_C(27.93)}, + {SIMDE_FLOAT32_C(924.32), SIMDE_FLOAT32_C(-863.60), + SIMDE_FLOAT32_C(-687.65), SIMDE_FLOAT32_C(238.39)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-791530.062500), SIMDE_FLOAT32_C(-792788.687500), + SIMDE_FLOAT32_C(191966.765625), SIMDE_FLOAT32_C(191790.843750)}}, + {{SIMDE_FLOAT32_C(-286.79), SIMDE_FLOAT32_C(630.61), + SIMDE_FLOAT32_C(-989.22), SIMDE_FLOAT32_C(223.21)}, + {SIMDE_FLOAT32_C(812.31), SIMDE_FLOAT32_C(667.33), + SIMDE_FLOAT32_C(841.41), SIMDE_FLOAT32_C(735.52)}, + {SIMDE_FLOAT32_C(308.52), SIMDE_FLOAT32_C(-189.06), + SIMDE_FLOAT32_C(-63.33), SIMDE_FLOAT32_C(837.76)}, + INT32_C(1), + {SIMDE_FLOAT32_C(153288.531250), SIMDE_FLOAT32_C(154205.937500), + SIMDE_FLOAT32_C(158087.750000), SIMDE_FLOAT32_C(159300.171875)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r; - SIMDE_CONSTIFY_2_(simde_vcmlaq_rot180_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); - simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + SIMDE_CONSTIFY_2_( + simde_vcmlaq_rot180_laneq_f32, r, + (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), + test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), + 1); - // simde_float32x4_t r = simde_vcmlaq_rot180_laneq_f32(r_, a, b, test_vec[i].lane); - // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + // simde_float32x4_t r = simde_vcmlaq_rot180_laneq_f32(r_, a, b, + // test_vec[i].lane); simde_test_arm_neon_write_f32x4(2, r, + // SIMDE_TEST_VEC_POS_LAST); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; + for (int i = 0; i < 8; i++) { simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcmlaq_rot180_laneq_f32(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); diff --git a/test/arm/neon/cmla_rot270_lane.c b/test/arm/neon/cmla_rot270_lane.c index deb0056bc..9242fff16 100644 --- a/test/arm/neon/cmla_rot270_lane.c +++ b/test/arm/neon/cmla_rot270_lane.c @@ -1,11 +1,11 @@ #define SIMDE_TEST_ARM_NEON_INSN cmla_rot270_lane -#include "test-neon.h" #include "../../../simde/arm/neon/cmla_rot270_lane.h" + #include "../../../simde/arm/neon/dup_n.h" +#include "test-neon.h" -static int -test_simde_vcmla_rot270_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmla_rot270_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float16_t r_[4]; @@ -14,71 +14,103 @@ test_simde_vcmla_rot270_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[4]; } test_vec[] = { - { - { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) }, - { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -31.19) }, - { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -48.00) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 389.25), SIMDE_FLOAT16_VALUE( -547.50), SIMDE_FLOAT16_VALUE( -1965.00), SIMDE_FLOAT16_VALUE( 771.00) } }, - { { SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, - { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -14.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, - { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 61.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( -133.12), SIMDE_FLOAT16_VALUE( 966.00), SIMDE_FLOAT16_VALUE( 43968.00), SIMDE_FLOAT16_VALUE(-43456.00) } }, - { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, - { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, - { SIMDE_FLOAT16_VALUE( -61.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 58720.00), SIMDE_FLOAT16_VALUE(-59360.00), SIMDE_FLOAT16_VALUE(-27488.00), SIMDE_FLOAT16_VALUE( 27264.00) } }, - { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -180.50) }, - { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 129.00), SIMDE_FLOAT16_VALUE( 99.06), SIMDE_FLOAT16_VALUE( -75.25) }, - { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 239.50), SIMDE_FLOAT16_VALUE( -29.96), SIMDE_FLOAT16_VALUE( -177.50) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( 30992.00), SIMDE_FLOAT16_VALUE(-31104.00), SIMDE_FLOAT16_VALUE(-18160.00), SIMDE_FLOAT16_VALUE( 17840.00) } }, - { { SIMDE_FLOAT16_VALUE( 167.25), SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( -63.38), SIMDE_FLOAT16_VALUE( 57.00) }, - { SIMDE_FLOAT16_VALUE( 191.75), SIMDE_FLOAT16_VALUE( -197.00), SIMDE_FLOAT16_VALUE( 285.00), SIMDE_FLOAT16_VALUE( -529.00) }, - { SIMDE_FLOAT16_VALUE( -80.50), SIMDE_FLOAT16_VALUE( 375.50), SIMDE_FLOAT16_VALUE( -206.00), SIMDE_FLOAT16_VALUE( -75.25) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 16024.00), SIMDE_FLOAT16_VALUE(-15864.00), SIMDE_FLOAT16_VALUE( 42528.00), SIMDE_FLOAT16_VALUE(-42528.00) } }, - { { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( -41.00), SIMDE_FLOAT16_VALUE( 285.00) }, - { SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -271.25), SIMDE_FLOAT16_VALUE( -257.50) }, - { SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( -180.50), SIMDE_FLOAT16_VALUE( 69.62), SIMDE_FLOAT16_VALUE( 131.38) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( -1876.00), SIMDE_FLOAT16_VALUE( 1769.00), SIMDE_FLOAT16_VALUE( 46432.00), SIMDE_FLOAT16_VALUE(-46208.00) } }, - { { SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -247.00), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 17.94) }, - { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( -110.75), SIMDE_FLOAT16_VALUE( 18.20) }, - { SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 97.31) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 15128.00), SIMDE_FLOAT16_VALUE(-15168.00), SIMDE_FLOAT16_VALUE( 1695.00), SIMDE_FLOAT16_VALUE( -1617.00) } }, - { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00) }, - { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( -151.12) }, - { SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44), SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( 75.88) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(-14904.00), SIMDE_FLOAT16_VALUE( 13320.00), SIMDE_FLOAT16_VALUE( 12312.00), SIMDE_FLOAT16_VALUE(-11272.00) } } - + {{SIMDE_FLOAT16_VALUE(-49.28), SIMDE_FLOAT16_VALUE(-109.00), + SIMDE_FLOAT16_VALUE(-626.50), SIMDE_FLOAT16_VALUE(-567.00)}, + {SIMDE_FLOAT16_VALUE(-178.88), SIMDE_FLOAT16_VALUE(10.22), + SIMDE_FLOAT16_VALUE(-228.12), SIMDE_FLOAT16_VALUE(-31.19)}, + {SIMDE_FLOAT16_VALUE(42.90), SIMDE_FLOAT16_VALUE(-98.75), + SIMDE_FLOAT16_VALUE(350.00), SIMDE_FLOAT16_VALUE(-48.00)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(389.25), SIMDE_FLOAT16_VALUE(-547.50), + SIMDE_FLOAT16_VALUE(-1965.00), SIMDE_FLOAT16_VALUE(771.00)}}, + {{SIMDE_FLOAT16_VALUE(730.00), SIMDE_FLOAT16_VALUE(103.12), + SIMDE_FLOAT16_VALUE(-381.25), SIMDE_FLOAT16_VALUE(898.50)}, + {SIMDE_FLOAT16_VALUE(293.00), SIMDE_FLOAT16_VALUE(-14.06), + SIMDE_FLOAT16_VALUE(-137.62), SIMDE_FLOAT16_VALUE(722.50)}, + {SIMDE_FLOAT16_VALUE(-6.33), SIMDE_FLOAT16_VALUE(61.38), + SIMDE_FLOAT16_VALUE(115.50), SIMDE_FLOAT16_VALUE(70.38)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-133.12), SIMDE_FLOAT16_VALUE(966.00), + SIMDE_FLOAT16_VALUE(43968.00), SIMDE_FLOAT16_VALUE(-43456.00)}}, + {{SIMDE_FLOAT16_VALUE(-210.88), SIMDE_FLOAT16_VALUE(-430.75), + SIMDE_FLOAT16_VALUE(725.50), SIMDE_FLOAT16_VALUE(-944.50)}, + {SIMDE_FLOAT16_VALUE(142.00), SIMDE_FLOAT16_VALUE(-966.00), + SIMDE_FLOAT16_VALUE(-131.50), SIMDE_FLOAT16_VALUE(462.50)}, + {SIMDE_FLOAT16_VALUE(-61.00), SIMDE_FLOAT16_VALUE(185.62), + SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(749.50)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(58720.00), SIMDE_FLOAT16_VALUE(-59360.00), + SIMDE_FLOAT16_VALUE(-27488.00), SIMDE_FLOAT16_VALUE(27264.00)}}, + {{SIMDE_FLOAT16_VALUE(89.44), SIMDE_FLOAT16_VALUE(-200.50), + SIMDE_FLOAT16_VALUE(-136.50), SIMDE_FLOAT16_VALUE(-180.50)}, + {SIMDE_FLOAT16_VALUE(-157.12), SIMDE_FLOAT16_VALUE(129.00), + SIMDE_FLOAT16_VALUE(99.06), SIMDE_FLOAT16_VALUE(-75.25)}, + {SIMDE_FLOAT16_VALUE(32.59), SIMDE_FLOAT16_VALUE(239.50), + SIMDE_FLOAT16_VALUE(-29.96), SIMDE_FLOAT16_VALUE(-177.50)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(30992.00), SIMDE_FLOAT16_VALUE(-31104.00), + SIMDE_FLOAT16_VALUE(-18160.00), SIMDE_FLOAT16_VALUE(17840.00)}}, + {{SIMDE_FLOAT16_VALUE(167.25), SIMDE_FLOAT16_VALUE(-1.52), + SIMDE_FLOAT16_VALUE(-63.38), SIMDE_FLOAT16_VALUE(57.00)}, + {SIMDE_FLOAT16_VALUE(191.75), SIMDE_FLOAT16_VALUE(-197.00), + SIMDE_FLOAT16_VALUE(285.00), SIMDE_FLOAT16_VALUE(-529.00)}, + {SIMDE_FLOAT16_VALUE(-80.50), SIMDE_FLOAT16_VALUE(375.50), + SIMDE_FLOAT16_VALUE(-206.00), SIMDE_FLOAT16_VALUE(-75.25)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(16024.00), SIMDE_FLOAT16_VALUE(-15864.00), + SIMDE_FLOAT16_VALUE(42528.00), SIMDE_FLOAT16_VALUE(-42528.00)}}, + {{SIMDE_FLOAT16_VALUE(-31.19), SIMDE_FLOAT16_VALUE(-75.25), + SIMDE_FLOAT16_VALUE(-41.00), SIMDE_FLOAT16_VALUE(285.00)}, + {SIMDE_FLOAT16_VALUE(-1.52), SIMDE_FLOAT16_VALUE(10.22), + SIMDE_FLOAT16_VALUE(-271.25), SIMDE_FLOAT16_VALUE(-257.50)}, + {SIMDE_FLOAT16_VALUE(-31.45), SIMDE_FLOAT16_VALUE(-180.50), + SIMDE_FLOAT16_VALUE(69.62), SIMDE_FLOAT16_VALUE(131.38)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-1876.00), SIMDE_FLOAT16_VALUE(1769.00), + SIMDE_FLOAT16_VALUE(46432.00), SIMDE_FLOAT16_VALUE(-46208.00)}}, + {{SIMDE_FLOAT16_VALUE(205.75), SIMDE_FLOAT16_VALUE(-247.00), + SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(17.94)}, + {SIMDE_FLOAT16_VALUE(-397.75), SIMDE_FLOAT16_VALUE(166.12), + SIMDE_FLOAT16_VALUE(-110.75), SIMDE_FLOAT16_VALUE(18.20)}, + {SIMDE_FLOAT16_VALUE(89.81), SIMDE_FLOAT16_VALUE(59.75), + SIMDE_FLOAT16_VALUE(-113.25), SIMDE_FLOAT16_VALUE(97.31)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(15128.00), SIMDE_FLOAT16_VALUE(-15168.00), + SIMDE_FLOAT16_VALUE(1695.00), SIMDE_FLOAT16_VALUE(-1617.00)}}, + {{SIMDE_FLOAT16_VALUE(-869.50), SIMDE_FLOAT16_VALUE(-710.00), + SIMDE_FLOAT16_VALUE(457.25), SIMDE_FLOAT16_VALUE(579.00)}, + {SIMDE_FLOAT16_VALUE(226.38), SIMDE_FLOAT16_VALUE(178.88), + SIMDE_FLOAT16_VALUE(181.75), SIMDE_FLOAT16_VALUE(-151.12)}, + {SIMDE_FLOAT16_VALUE(121.50), SIMDE_FLOAT16_VALUE(-78.44), + SIMDE_FLOAT16_VALUE(48.06), SIMDE_FLOAT16_VALUE(75.88)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-14904.00), SIMDE_FLOAT16_VALUE(13320.00), + SIMDE_FLOAT16_VALUE(12312.00), SIMDE_FLOAT16_VALUE(-11272.00)}} + }; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x4_t r; - SIMDE_CONSTIFY_2_(simde_vcmla_rot270_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_( + simde_vcmla_rot270_lane_f16, r, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); - } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; + for (int i = 0; i < 8; i++) { simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t r = simde_vcmla_rot270_lane_f16(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -89,8 +121,7 @@ test_simde_vcmla_rot270_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmla_rot270_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmla_rot270_lane_f32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[2]; @@ -99,49 +130,48 @@ test_simde_vcmla_rot270_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[2]; } test_vec[] = { - { { SIMDE_FLOAT32_C( 83.21), SIMDE_FLOAT32_C( 417.90) }, - { SIMDE_FLOAT32_C( -875.72), SIMDE_FLOAT32_C( 830.54) }, - { SIMDE_FLOAT32_C( -633.53), SIMDE_FLOAT32_C( 832.17) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-526088.812500), SIMDE_FLOAT32_C(526589.937500) } }, - { { SIMDE_FLOAT32_C( -890.17), SIMDE_FLOAT32_C( 649.92) }, - { SIMDE_FLOAT32_C( -111.22), SIMDE_FLOAT32_C( -830.36) }, - { SIMDE_FLOAT32_C( 59.76), SIMDE_FLOAT32_C( 970.61) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-50512.480469), SIMDE_FLOAT32_C(50272.230469) } }, - { { SIMDE_FLOAT32_C( 522.31), SIMDE_FLOAT32_C( -822.40) }, - { SIMDE_FLOAT32_C( 411.34), SIMDE_FLOAT32_C( -692.35) }, - { SIMDE_FLOAT32_C( 648.71), SIMDE_FLOAT32_C( 385.20) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-448612.062500), SIMDE_FLOAT32_C(448311.968750) } }, - { { SIMDE_FLOAT32_C( 479.18), SIMDE_FLOAT32_C( -793.73) }, - { SIMDE_FLOAT32_C( -740.26), SIMDE_FLOAT32_C( 245.04) }, - { SIMDE_FLOAT32_C( 229.26), SIMDE_FLOAT32_C( -113.23) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(56657.046875), SIMDE_FLOAT32_C(-56971.597656) } }, - { { SIMDE_FLOAT32_C( 331.48), SIMDE_FLOAT32_C( -677.34) }, - { SIMDE_FLOAT32_C( 97.30), SIMDE_FLOAT32_C( -52.10) }, - { SIMDE_FLOAT32_C( 239.17), SIMDE_FLOAT32_C( 469.68) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-12129.276367), SIMDE_FLOAT32_C(11783.416992) } }, - { { SIMDE_FLOAT32_C( -543.40), SIMDE_FLOAT32_C( 826.85) }, - { SIMDE_FLOAT32_C( 226.38), SIMDE_FLOAT32_C( 178.84) }, - { SIMDE_FLOAT32_C( 181.71), SIMDE_FLOAT32_C( 420.52) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(31953.617188), SIMDE_FLOAT32_C(-31670.166016) } }, - { { SIMDE_FLOAT32_C( -698.84), SIMDE_FLOAT32_C( -151.15) }, - { SIMDE_FLOAT32_C( -388.27), SIMDE_FLOAT32_C( 350.81) }, - { SIMDE_FLOAT32_C( 890.40), SIMDE_FLOAT32_C( -664.75) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(311662.375000), SIMDE_FLOAT32_C(-312512.375000) } }, - { { SIMDE_FLOAT32_C( -617.94), SIMDE_FLOAT32_C( 190.84) }, - { SIMDE_FLOAT32_C( 218.13), SIMDE_FLOAT32_C( -328.97) }, - { SIMDE_FLOAT32_C( -549.59), SIMDE_FLOAT32_C( -459.89) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(180180.687500), SIMDE_FLOAT32_C(-180607.796875) } } - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + {{SIMDE_FLOAT32_C(83.21), SIMDE_FLOAT32_C(417.90)}, + {SIMDE_FLOAT32_C(-875.72), SIMDE_FLOAT32_C(830.54)}, + {SIMDE_FLOAT32_C(-633.53), SIMDE_FLOAT32_C(832.17)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-526088.812500), SIMDE_FLOAT32_C(526589.937500)}}, + {{SIMDE_FLOAT32_C(-890.17), SIMDE_FLOAT32_C(649.92)}, + {SIMDE_FLOAT32_C(-111.22), SIMDE_FLOAT32_C(-830.36)}, + {SIMDE_FLOAT32_C(59.76), SIMDE_FLOAT32_C(970.61)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-50512.480469), SIMDE_FLOAT32_C(50272.230469)}}, + {{SIMDE_FLOAT32_C(522.31), SIMDE_FLOAT32_C(-822.40)}, + {SIMDE_FLOAT32_C(411.34), SIMDE_FLOAT32_C(-692.35)}, + {SIMDE_FLOAT32_C(648.71), SIMDE_FLOAT32_C(385.20)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-448612.062500), SIMDE_FLOAT32_C(448311.968750)}}, + {{SIMDE_FLOAT32_C(479.18), SIMDE_FLOAT32_C(-793.73)}, + {SIMDE_FLOAT32_C(-740.26), SIMDE_FLOAT32_C(245.04)}, + {SIMDE_FLOAT32_C(229.26), SIMDE_FLOAT32_C(-113.23)}, + INT32_C(0), + {SIMDE_FLOAT32_C(56657.046875), SIMDE_FLOAT32_C(-56971.597656)}}, + {{SIMDE_FLOAT32_C(331.48), SIMDE_FLOAT32_C(-677.34)}, + {SIMDE_FLOAT32_C(97.30), SIMDE_FLOAT32_C(-52.10)}, + {SIMDE_FLOAT32_C(239.17), SIMDE_FLOAT32_C(469.68)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-12129.276367), SIMDE_FLOAT32_C(11783.416992)}}, + {{SIMDE_FLOAT32_C(-543.40), SIMDE_FLOAT32_C(826.85)}, + {SIMDE_FLOAT32_C(226.38), SIMDE_FLOAT32_C(178.84)}, + {SIMDE_FLOAT32_C(181.71), SIMDE_FLOAT32_C(420.52)}, + INT32_C(0), + {SIMDE_FLOAT32_C(31953.617188), SIMDE_FLOAT32_C(-31670.166016)}}, + {{SIMDE_FLOAT32_C(-698.84), SIMDE_FLOAT32_C(-151.15)}, + {SIMDE_FLOAT32_C(-388.27), SIMDE_FLOAT32_C(350.81)}, + {SIMDE_FLOAT32_C(890.40), SIMDE_FLOAT32_C(-664.75)}, + INT32_C(0), + {SIMDE_FLOAT32_C(311662.375000), SIMDE_FLOAT32_C(-312512.375000)}}, + {{SIMDE_FLOAT32_C(-617.94), SIMDE_FLOAT32_C(190.84)}, + {SIMDE_FLOAT32_C(218.13), SIMDE_FLOAT32_C(-328.97)}, + {SIMDE_FLOAT32_C(-549.59), SIMDE_FLOAT32_C(-459.89)}, + INT32_C(0), + {SIMDE_FLOAT32_C(180180.687500), SIMDE_FLOAT32_C(-180607.796875)}}}; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); @@ -150,18 +180,17 @@ test_simde_vcmla_rot270_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 0, 0, 0, 0, 0, 0, 0}; + for (int i = 0; i < 8; i++) { simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vcmla_rot270_lane_f32(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -172,9 +201,7 @@ test_simde_vcmla_rot270_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } - -static int -test_simde_vcmla_rot270_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmla_rot270_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float16_t r_[4]; @@ -183,78 +210,118 @@ test_simde_vcmla_rot270_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[4]; } test_vec[] = { - { { SIMDE_FLOAT16_VALUE( 816.50), SIMDE_FLOAT16_VALUE( 773.50), SIMDE_FLOAT16_VALUE( -377.75), SIMDE_FLOAT16_VALUE( 544.50) }, - { SIMDE_FLOAT16_VALUE( -188.38), SIMDE_FLOAT16_VALUE( 269.75), SIMDE_FLOAT16_VALUE( 115.62), SIMDE_FLOAT16_VALUE( -544.00) }, - { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( 484.00), SIMDE_FLOAT16_VALUE( -757.00), - SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 12392.00), SIMDE_FLOAT16_VALUE(-10800.00), SIMDE_FLOAT16_VALUE(-23712.00), SIMDE_FLOAT16_VALUE( 23888.00) } }, - { { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, - { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, - { SIMDE_FLOAT16_VALUE( -899.00), SIMDE_FLOAT16_VALUE( 32.51), SIMDE_FLOAT16_VALUE( 359.00), SIMDE_FLOAT16_VALUE( 342.25), - SIMDE_FLOAT16_VALUE( 913.00), SIMDE_FLOAT16_VALUE( 740.50), SIMDE_FLOAT16_VALUE( -436.25), SIMDE_FLOAT16_VALUE( -904.00) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( 5536.00), SIMDE_FLOAT16_VALUE( -5360.00), SIMDE_FLOAT16_VALUE( 2150.00), SIMDE_FLOAT16_VALUE( -1565.00) } }, - { { SIMDE_FLOAT16_VALUE( -335.25), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( 227.25), SIMDE_FLOAT16_VALUE( -8.67) }, - { SIMDE_FLOAT16_VALUE( 490.50), SIMDE_FLOAT16_VALUE( 507.50), SIMDE_FLOAT16_VALUE( -979.50), SIMDE_FLOAT16_VALUE( -718.00) }, - { SIMDE_FLOAT16_VALUE( -10.20), SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -797.50), SIMDE_FLOAT16_VALUE( 741.50), - SIMDE_FLOAT16_VALUE( -835.00), SIMDE_FLOAT16_VALUE( -308.00), SIMDE_FLOAT16_VALUE( -674.50), SIMDE_FLOAT16_VALUE( -247.00) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( -5512.00), SIMDE_FLOAT16_VALUE( 4924.00), SIMDE_FLOAT16_VALUE( 7552.00), SIMDE_FLOAT16_VALUE( -7336.00) } }, - { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 25.50), SIMDE_FLOAT16_VALUE( -44.50) }, - { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -66.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, - { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 85.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50), - SIMDE_FLOAT16_VALUE( -975.50), SIMDE_FLOAT16_VALUE( 383.50), SIMDE_FLOAT16_VALUE( 500.25), SIMDE_FLOAT16_VALUE( 517.00) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( -5864.00), SIMDE_FLOAT16_VALUE( 5220.00), SIMDE_FLOAT16_VALUE( 39616.00), SIMDE_FLOAT16_VALUE(-39648.00) } }, - { { SIMDE_FLOAT16_VALUE( 540.00), SIMDE_FLOAT16_VALUE( -927.50), SIMDE_FLOAT16_VALUE( 17.94), SIMDE_FLOAT16_VALUE( 340.75) }, - { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( 854.00), SIMDE_FLOAT16_VALUE( -110.75) }, - { SIMDE_FLOAT16_VALUE( -53.36), SIMDE_FLOAT16_VALUE( -465.00), SIMDE_FLOAT16_VALUE( -104.00), SIMDE_FLOAT16_VALUE( -348.00), - SIMDE_FLOAT16_VALUE( -370.75), SIMDE_FLOAT16_VALUE( 18.20), SIMDE_FLOAT16_VALUE( -209.25), SIMDE_FLOAT16_VALUE( -856.00) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( -8328.00), SIMDE_FLOAT16_VALUE( 7940.00), SIMDE_FLOAT16_VALUE( 5928.00), SIMDE_FLOAT16_VALUE( -5572.00) } }, - { { SIMDE_FLOAT16_VALUE( 251.25), SIMDE_FLOAT16_VALUE( 523.50), SIMDE_FLOAT16_VALUE( -62.34), SIMDE_FLOAT16_VALUE( -412.00) }, - { SIMDE_FLOAT16_VALUE( 818.50), SIMDE_FLOAT16_VALUE( -266.25), SIMDE_FLOAT16_VALUE( 558.50), SIMDE_FLOAT16_VALUE( -474.75) }, - { SIMDE_FLOAT16_VALUE( 583.50), SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 875.50), SIMDE_FLOAT16_VALUE( 382.50), - SIMDE_FLOAT16_VALUE( -535.00), SIMDE_FLOAT16_VALUE( 374.50), SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 432.50) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(-15368.00), SIMDE_FLOAT16_VALUE( 16144.00), SIMDE_FLOAT16_VALUE(-27904.00), SIMDE_FLOAT16_VALUE( 27440.00) } }, - { { SIMDE_FLOAT16_VALUE( 83.19), SIMDE_FLOAT16_VALUE( 418.00), SIMDE_FLOAT16_VALUE( -875.50), SIMDE_FLOAT16_VALUE( 830.50) }, - { SIMDE_FLOAT16_VALUE( -633.50), SIMDE_FLOAT16_VALUE( 832.00), SIMDE_FLOAT16_VALUE( -890.00), SIMDE_FLOAT16_VALUE( 650.00) }, - { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -830.50), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( 970.50), - SIMDE_FLOAT16_VALUE( 522.50), SIMDE_FLOAT16_VALUE( -822.50), SIMDE_FLOAT16_VALUE( 411.25), SIMDE_FLOAT16_VALUE( -692.50) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-25872.00), SIMDE_FLOAT16_VALUE( 26368.00), SIMDE_FLOAT16_VALUE(-21152.00), SIMDE_FLOAT16_VALUE( 21104.00) } }, - { { SIMDE_FLOAT16_VALUE( 648.50), SIMDE_FLOAT16_VALUE( 385.25), SIMDE_FLOAT16_VALUE( 479.25), SIMDE_FLOAT16_VALUE( -793.50) }, - { SIMDE_FLOAT16_VALUE( -740.50), SIMDE_FLOAT16_VALUE( 245.00), SIMDE_FLOAT16_VALUE( 229.25), SIMDE_FLOAT16_VALUE( -113.25) }, - { SIMDE_FLOAT16_VALUE( 331.50), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( 97.31), SIMDE_FLOAT16_VALUE( -52.09), - SIMDE_FLOAT16_VALUE( 239.12), SIMDE_FLOAT16_VALUE( 469.75), SIMDE_FLOAT16_VALUE( -543.50), SIMDE_FLOAT16_VALUE( 827.00) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(-17792.00), SIMDE_FLOAT16_VALUE( 18816.00), SIMDE_FLOAT16_VALUE( 9000.00), SIMDE_FLOAT16_VALUE( -9312.00) } } - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + {{SIMDE_FLOAT16_VALUE(816.50), SIMDE_FLOAT16_VALUE(773.50), + SIMDE_FLOAT16_VALUE(-377.75), SIMDE_FLOAT16_VALUE(544.50)}, + {SIMDE_FLOAT16_VALUE(-188.38), SIMDE_FLOAT16_VALUE(269.75), + SIMDE_FLOAT16_VALUE(115.62), SIMDE_FLOAT16_VALUE(-544.00)}, + {SIMDE_FLOAT16_VALUE(42.90), SIMDE_FLOAT16_VALUE(924.50), + SIMDE_FLOAT16_VALUE(484.00), SIMDE_FLOAT16_VALUE(-757.00), + SIMDE_FLOAT16_VALUE(730.00), SIMDE_FLOAT16_VALUE(103.12), + SIMDE_FLOAT16_VALUE(-381.25), SIMDE_FLOAT16_VALUE(898.50)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(12392.00), SIMDE_FLOAT16_VALUE(-10800.00), + SIMDE_FLOAT16_VALUE(-23712.00), SIMDE_FLOAT16_VALUE(23888.00)}}, + {{SIMDE_FLOAT16_VALUE(293.00), SIMDE_FLOAT16_VALUE(-114.06), + SIMDE_FLOAT16_VALUE(-137.62), SIMDE_FLOAT16_VALUE(722.50)}, + {SIMDE_FLOAT16_VALUE(-6.33), SIMDE_FLOAT16_VALUE(161.38), + SIMDE_FLOAT16_VALUE(115.50), SIMDE_FLOAT16_VALUE(70.38)}, + {SIMDE_FLOAT16_VALUE(-899.00), SIMDE_FLOAT16_VALUE(32.51), + SIMDE_FLOAT16_VALUE(359.00), SIMDE_FLOAT16_VALUE(342.25), + SIMDE_FLOAT16_VALUE(913.00), SIMDE_FLOAT16_VALUE(740.50), + SIMDE_FLOAT16_VALUE(-436.25), SIMDE_FLOAT16_VALUE(-904.00)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(5536.00), SIMDE_FLOAT16_VALUE(-5360.00), + SIMDE_FLOAT16_VALUE(2150.00), SIMDE_FLOAT16_VALUE(-1565.00)}}, + {{SIMDE_FLOAT16_VALUE(-335.25), SIMDE_FLOAT16_VALUE(-252.38), + SIMDE_FLOAT16_VALUE(227.25), SIMDE_FLOAT16_VALUE(-8.67)}, + {SIMDE_FLOAT16_VALUE(490.50), SIMDE_FLOAT16_VALUE(507.50), + SIMDE_FLOAT16_VALUE(-979.50), SIMDE_FLOAT16_VALUE(-718.00)}, + {SIMDE_FLOAT16_VALUE(-10.20), SIMDE_FLOAT16_VALUE(205.75), + SIMDE_FLOAT16_VALUE(-797.50), SIMDE_FLOAT16_VALUE(741.50), + SIMDE_FLOAT16_VALUE(-835.00), SIMDE_FLOAT16_VALUE(-308.00), + SIMDE_FLOAT16_VALUE(-674.50), SIMDE_FLOAT16_VALUE(-247.00)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-5512.00), SIMDE_FLOAT16_VALUE(4924.00), + SIMDE_FLOAT16_VALUE(7552.00), SIMDE_FLOAT16_VALUE(-7336.00)}}, + {{SIMDE_FLOAT16_VALUE(-210.88), SIMDE_FLOAT16_VALUE(-430.75), + SIMDE_FLOAT16_VALUE(25.50), SIMDE_FLOAT16_VALUE(-44.50)}, + {SIMDE_FLOAT16_VALUE(142.00), SIMDE_FLOAT16_VALUE(-66.00), + SIMDE_FLOAT16_VALUE(-131.50), SIMDE_FLOAT16_VALUE(462.50)}, + {SIMDE_FLOAT16_VALUE(-361.00), SIMDE_FLOAT16_VALUE(85.62), + SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(749.50), + SIMDE_FLOAT16_VALUE(-975.50), SIMDE_FLOAT16_VALUE(383.50), + SIMDE_FLOAT16_VALUE(500.25), SIMDE_FLOAT16_VALUE(517.00)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-5864.00), SIMDE_FLOAT16_VALUE(5220.00), + SIMDE_FLOAT16_VALUE(39616.00), SIMDE_FLOAT16_VALUE(-39648.00)}}, + {{SIMDE_FLOAT16_VALUE(540.00), SIMDE_FLOAT16_VALUE(-927.50), + SIMDE_FLOAT16_VALUE(17.94), SIMDE_FLOAT16_VALUE(340.75)}, + {SIMDE_FLOAT16_VALUE(-397.75), SIMDE_FLOAT16_VALUE(166.12), + SIMDE_FLOAT16_VALUE(854.00), SIMDE_FLOAT16_VALUE(-110.75)}, + {SIMDE_FLOAT16_VALUE(-53.36), SIMDE_FLOAT16_VALUE(-465.00), + SIMDE_FLOAT16_VALUE(-104.00), SIMDE_FLOAT16_VALUE(-348.00), + SIMDE_FLOAT16_VALUE(-370.75), SIMDE_FLOAT16_VALUE(18.20), + SIMDE_FLOAT16_VALUE(-209.25), SIMDE_FLOAT16_VALUE(-856.00)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-8328.00), SIMDE_FLOAT16_VALUE(7940.00), + SIMDE_FLOAT16_VALUE(5928.00), SIMDE_FLOAT16_VALUE(-5572.00)}}, + {{SIMDE_FLOAT16_VALUE(251.25), SIMDE_FLOAT16_VALUE(523.50), + SIMDE_FLOAT16_VALUE(-62.34), SIMDE_FLOAT16_VALUE(-412.00)}, + {SIMDE_FLOAT16_VALUE(818.50), SIMDE_FLOAT16_VALUE(-266.25), + SIMDE_FLOAT16_VALUE(558.50), SIMDE_FLOAT16_VALUE(-474.75)}, + {SIMDE_FLOAT16_VALUE(583.50), SIMDE_FLOAT16_VALUE(58.66), + SIMDE_FLOAT16_VALUE(875.50), SIMDE_FLOAT16_VALUE(382.50), + SIMDE_FLOAT16_VALUE(-535.00), SIMDE_FLOAT16_VALUE(374.50), + SIMDE_FLOAT16_VALUE(89.81), SIMDE_FLOAT16_VALUE(432.50)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-15368.00), SIMDE_FLOAT16_VALUE(16144.00), + SIMDE_FLOAT16_VALUE(-27904.00), SIMDE_FLOAT16_VALUE(27440.00)}}, + {{SIMDE_FLOAT16_VALUE(83.19), SIMDE_FLOAT16_VALUE(418.00), + SIMDE_FLOAT16_VALUE(-875.50), SIMDE_FLOAT16_VALUE(830.50)}, + {SIMDE_FLOAT16_VALUE(-633.50), SIMDE_FLOAT16_VALUE(832.00), + SIMDE_FLOAT16_VALUE(-890.00), SIMDE_FLOAT16_VALUE(650.00)}, + {SIMDE_FLOAT16_VALUE(-31.19), SIMDE_FLOAT16_VALUE(-830.50), + SIMDE_FLOAT16_VALUE(59.75), SIMDE_FLOAT16_VALUE(970.50), + SIMDE_FLOAT16_VALUE(522.50), SIMDE_FLOAT16_VALUE(-822.50), + SIMDE_FLOAT16_VALUE(411.25), SIMDE_FLOAT16_VALUE(-692.50)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-25872.00), SIMDE_FLOAT16_VALUE(26368.00), + SIMDE_FLOAT16_VALUE(-21152.00), SIMDE_FLOAT16_VALUE(21104.00)}}, + {{SIMDE_FLOAT16_VALUE(648.50), SIMDE_FLOAT16_VALUE(385.25), + SIMDE_FLOAT16_VALUE(479.25), SIMDE_FLOAT16_VALUE(-793.50)}, + {SIMDE_FLOAT16_VALUE(-740.50), SIMDE_FLOAT16_VALUE(245.00), + SIMDE_FLOAT16_VALUE(229.25), SIMDE_FLOAT16_VALUE(-113.25)}, + {SIMDE_FLOAT16_VALUE(331.50), SIMDE_FLOAT16_VALUE(-75.25), + SIMDE_FLOAT16_VALUE(97.31), SIMDE_FLOAT16_VALUE(-52.09), + SIMDE_FLOAT16_VALUE(239.12), SIMDE_FLOAT16_VALUE(469.75), + SIMDE_FLOAT16_VALUE(-543.50), SIMDE_FLOAT16_VALUE(827.00)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-17792.00), SIMDE_FLOAT16_VALUE(18816.00), + SIMDE_FLOAT16_VALUE(9000.00), SIMDE_FLOAT16_VALUE(-9312.00)}}}; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); - + simde_float16x4_t r; - SIMDE_CONSTIFY_2_(simde_vcmla_rot270_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_( + simde_vcmla_rot270_laneq_f16, r, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); - } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; + for (int i = 0; i < 8; i++) { simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x4_t r = simde_vcmla_rot270_laneq_f16(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -265,8 +332,7 @@ test_simde_vcmla_rot270_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmla_rot270_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmla_rot270_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[2]; @@ -275,70 +341,78 @@ test_simde_vcmla_rot270_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[2]; } test_vec[] = { - { { SIMDE_FLOAT32_C( -470.26), SIMDE_FLOAT32_C( 81.87) }, - { SIMDE_FLOAT32_C( 72.27), SIMDE_FLOAT32_C( -992.59) }, - { SIMDE_FLOAT32_C( -615.67), SIMDE_FLOAT32_C( 620.28), SIMDE_FLOAT32_C( -606.51), SIMDE_FLOAT32_C( -327.77) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(610637.625000), SIMDE_FLOAT32_C(-611026.000000) } }, - { { SIMDE_FLOAT32_C( -331.80), SIMDE_FLOAT32_C( -605.94) }, - { SIMDE_FLOAT32_C( -295.86), SIMDE_FLOAT32_C( -275.42) }, - { SIMDE_FLOAT32_C( -752.64), SIMDE_FLOAT32_C( 21.91), SIMDE_FLOAT32_C( 826.83), SIMDE_FLOAT32_C( 599.82) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(-6366.252441), SIMDE_FLOAT32_C(5428.512207) } }, - { { SIMDE_FLOAT32_C( -30.36), SIMDE_FLOAT32_C( 631.53) }, - { SIMDE_FLOAT32_C( 850.75), SIMDE_FLOAT32_C( -263.55) }, - { SIMDE_FLOAT32_C( 139.96), SIMDE_FLOAT32_C( 859.14), SIMDE_FLOAT32_C( -834.47), SIMDE_FLOAT32_C( 216.10) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-36916.816406), SIMDE_FLOAT32_C(37517.988281) } }, - { { SIMDE_FLOAT32_C( 995.86), SIMDE_FLOAT32_C( 529.74) }, - { SIMDE_FLOAT32_C( 79.08), SIMDE_FLOAT32_C( 947.13) }, - { SIMDE_FLOAT32_C( 122.02), SIMDE_FLOAT32_C( -250.00), SIMDE_FLOAT32_C( -361.82), SIMDE_FLOAT32_C( 265.24) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(-235786.640625), SIMDE_FLOAT32_C(237312.234375) } }, - { { SIMDE_FLOAT32_C( 275.71), SIMDE_FLOAT32_C( 2.71) }, - { SIMDE_FLOAT32_C( 99.79), SIMDE_FLOAT32_C( -137.67) }, - { SIMDE_FLOAT32_C( -761.19), SIMDE_FLOAT32_C( 813.19), SIMDE_FLOAT32_C( -897.68), SIMDE_FLOAT32_C( 653.58) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(105068.734375), SIMDE_FLOAT32_C(-104790.312500) } }, - { { SIMDE_FLOAT32_C( 396.02), SIMDE_FLOAT32_C( 413.06) }, - { SIMDE_FLOAT32_C( 514.09), SIMDE_FLOAT32_C( -977.67) }, - { SIMDE_FLOAT32_C( -671.79), SIMDE_FLOAT32_C( -92.13), SIMDE_FLOAT32_C( -441.32), SIMDE_FLOAT32_C( -374.27) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(90468.750000), SIMDE_FLOAT32_C(-89659.671875) } }, - { { SIMDE_FLOAT32_C( -151.97), SIMDE_FLOAT32_C( -79.55) }, - { SIMDE_FLOAT32_C( -214.62), SIMDE_FLOAT32_C( -614.75) }, - { SIMDE_FLOAT32_C( 678.79), SIMDE_FLOAT32_C( 783.83), SIMDE_FLOAT32_C( 493.05), SIMDE_FLOAT32_C( -896.00) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-417438.093750), SIMDE_FLOAT32_C(417206.593750) } }, - { { SIMDE_FLOAT32_C( 104.13), SIMDE_FLOAT32_C( 278.54) }, - { SIMDE_FLOAT32_C( 171.54), SIMDE_FLOAT32_C( -682.63) }, - { SIMDE_FLOAT32_C( 217.09), SIMDE_FLOAT32_C( 49.35), SIMDE_FLOAT32_C( 256.50), SIMDE_FLOAT32_C( -92.04) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(-33583.660156), SIMDE_FLOAT32_C(33966.328125) } } - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + {{SIMDE_FLOAT32_C(-470.26), SIMDE_FLOAT32_C(81.87)}, + {SIMDE_FLOAT32_C(72.27), SIMDE_FLOAT32_C(-992.59)}, + {SIMDE_FLOAT32_C(-615.67), SIMDE_FLOAT32_C(620.28), + SIMDE_FLOAT32_C(-606.51), SIMDE_FLOAT32_C(-327.77)}, + INT32_C(0), + {SIMDE_FLOAT32_C(610637.625000), SIMDE_FLOAT32_C(-611026.000000)}}, + {{SIMDE_FLOAT32_C(-331.80), SIMDE_FLOAT32_C(-605.94)}, + {SIMDE_FLOAT32_C(-295.86), SIMDE_FLOAT32_C(-275.42)}, + {SIMDE_FLOAT32_C(-752.64), SIMDE_FLOAT32_C(21.91), + SIMDE_FLOAT32_C(826.83), SIMDE_FLOAT32_C(599.82)}, + INT32_C(1), + {SIMDE_FLOAT32_C(-6366.252441), SIMDE_FLOAT32_C(5428.512207)}}, + {{SIMDE_FLOAT32_C(-30.36), SIMDE_FLOAT32_C(631.53)}, + {SIMDE_FLOAT32_C(850.75), SIMDE_FLOAT32_C(-263.55)}, + {SIMDE_FLOAT32_C(139.96), SIMDE_FLOAT32_C(859.14), + SIMDE_FLOAT32_C(-834.47), SIMDE_FLOAT32_C(216.10)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-36916.816406), SIMDE_FLOAT32_C(37517.988281)}}, + {{SIMDE_FLOAT32_C(995.86), SIMDE_FLOAT32_C(529.74)}, + {SIMDE_FLOAT32_C(79.08), SIMDE_FLOAT32_C(947.13)}, + {SIMDE_FLOAT32_C(122.02), SIMDE_FLOAT32_C(-250.00), + SIMDE_FLOAT32_C(-361.82), SIMDE_FLOAT32_C(265.24)}, + INT32_C(1), + {SIMDE_FLOAT32_C(-235786.640625), SIMDE_FLOAT32_C(237312.234375)}}, + {{SIMDE_FLOAT32_C(275.71), SIMDE_FLOAT32_C(2.71)}, + {SIMDE_FLOAT32_C(99.79), SIMDE_FLOAT32_C(-137.67)}, + {SIMDE_FLOAT32_C(-761.19), SIMDE_FLOAT32_C(813.19), + SIMDE_FLOAT32_C(-897.68), SIMDE_FLOAT32_C(653.58)}, + INT32_C(0), + {SIMDE_FLOAT32_C(105068.734375), SIMDE_FLOAT32_C(-104790.312500)}}, + {{SIMDE_FLOAT32_C(396.02), SIMDE_FLOAT32_C(413.06)}, + {SIMDE_FLOAT32_C(514.09), SIMDE_FLOAT32_C(-977.67)}, + {SIMDE_FLOAT32_C(-671.79), SIMDE_FLOAT32_C(-92.13), + SIMDE_FLOAT32_C(-441.32), SIMDE_FLOAT32_C(-374.27)}, + INT32_C(1), + {SIMDE_FLOAT32_C(90468.750000), SIMDE_FLOAT32_C(-89659.671875)}}, + {{SIMDE_FLOAT32_C(-151.97), SIMDE_FLOAT32_C(-79.55)}, + {SIMDE_FLOAT32_C(-214.62), SIMDE_FLOAT32_C(-614.75)}, + {SIMDE_FLOAT32_C(678.79), SIMDE_FLOAT32_C(783.83), + SIMDE_FLOAT32_C(493.05), SIMDE_FLOAT32_C(-896.00)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-417438.093750), SIMDE_FLOAT32_C(417206.593750)}}, + {{SIMDE_FLOAT32_C(104.13), SIMDE_FLOAT32_C(278.54)}, + {SIMDE_FLOAT32_C(171.54), SIMDE_FLOAT32_C(-682.63)}, + {SIMDE_FLOAT32_C(217.09), SIMDE_FLOAT32_C(49.35), + SIMDE_FLOAT32_C(256.50), SIMDE_FLOAT32_C(-92.04)}, + INT32_C(1), + {SIMDE_FLOAT32_C(-33583.660156), SIMDE_FLOAT32_C(33966.328125)}}}; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x2_t r; - SIMDE_CONSTIFY_2_(simde_vcmla_rot270_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_( + simde_vcmla_rot270_laneq_f32, r, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), + test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); - } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; + for (int i = 0; i < 8; i++) { simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vcmla_rot270_laneq_f32(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -349,8 +423,7 @@ test_simde_vcmla_rot270_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmlaq_rot270_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmlaq_rot270_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float16_t r_[8]; @@ -359,95 +432,153 @@ test_simde_vcmlaq_rot270_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[8]; } test_vec[] = { - - { { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( 420.50), - SIMDE_FLOAT16_VALUE( -699.00), SIMDE_FLOAT16_VALUE( -151.12), SIMDE_FLOAT16_VALUE( -388.25), SIMDE_FLOAT16_VALUE( 350.75) }, - { SIMDE_FLOAT16_VALUE( 890.50), SIMDE_FLOAT16_VALUE( -665.00), SIMDE_FLOAT16_VALUE( -618.00), SIMDE_FLOAT16_VALUE( 190.88), - SIMDE_FLOAT16_VALUE( 218.12), SIMDE_FLOAT16_VALUE( -329.00), SIMDE_FLOAT16_VALUE( -549.50), SIMDE_FLOAT16_VALUE( -460.00) }, - { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -695.50), SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 32992.00), SIMDE_FLOAT16_VALUE(-32592.00), SIMDE_FLOAT16_VALUE( -9224.00), SIMDE_FLOAT16_VALUE( 9824.00), - SIMDE_FLOAT16_VALUE( 15512.00), SIMDE_FLOAT16_VALUE(-16368.00), SIMDE_FLOAT16_VALUE( 22288.00), SIMDE_FLOAT16_VALUE(-22320.00) } }, - { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00), - SIMDE_FLOAT16_VALUE( -786.50), SIMDE_FLOAT16_VALUE( -974.00), SIMDE_FLOAT16_VALUE( -570.50), SIMDE_FLOAT16_VALUE( 369.25) }, - { SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( -40.00), SIMDE_FLOAT16_VALUE( -52.00), SIMDE_FLOAT16_VALUE( 75.88), - SIMDE_FLOAT16_VALUE( 352.25), SIMDE_FLOAT16_VALUE( 324.25), SIMDE_FLOAT16_VALUE( 485.50), SIMDE_FLOAT16_VALUE( 43.50) }, - { SIMDE_FLOAT16_VALUE( -976.00), SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -100.62), SIMDE_FLOAT16_VALUE( -463.75) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( -4448.00), SIMDE_FLOAT16_VALUE( 2868.00), SIMDE_FLOAT16_VALUE( 7244.00), SIMDE_FLOAT16_VALUE( -6208.00), - SIMDE_FLOAT16_VALUE( 28208.00), SIMDE_FLOAT16_VALUE(-29968.00), SIMDE_FLOAT16_VALUE( 3320.00), SIMDE_FLOAT16_VALUE( -3522.00) } }, - { { SIMDE_FLOAT16_VALUE( -973.00), SIMDE_FLOAT16_VALUE( -159.62), SIMDE_FLOAT16_VALUE( -377.00), SIMDE_FLOAT16_VALUE( -853.00), - SIMDE_FLOAT16_VALUE( 831.00), SIMDE_FLOAT16_VALUE( 661.00), SIMDE_FLOAT16_VALUE( -717.00), SIMDE_FLOAT16_VALUE( 891.50) }, - { SIMDE_FLOAT16_VALUE( 535.50), SIMDE_FLOAT16_VALUE( 621.50), SIMDE_FLOAT16_VALUE( 578.50), SIMDE_FLOAT16_VALUE( 430.50), - SIMDE_FLOAT16_VALUE( -574.50), SIMDE_FLOAT16_VALUE( -319.75), SIMDE_FLOAT16_VALUE( -735.00), SIMDE_FLOAT16_VALUE( 248.50) }, - { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 192.38), SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 502.25) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 19280.00), SIMDE_FLOAT16_VALUE(-20416.00), SIMDE_FLOAT16_VALUE( 13656.00), SIMDE_FLOAT16_VALUE(-14888.00), - SIMDE_FLOAT16_VALUE( -9592.00), SIMDE_FLOAT16_VALUE( 11080.00), SIMDE_FLOAT16_VALUE( 7384.00), SIMDE_FLOAT16_VALUE( -7208.00) } }, - { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( -70.81), SIMDE_FLOAT16_VALUE( -596.50), SIMDE_FLOAT16_VALUE( 810.50), - SIMDE_FLOAT16_VALUE( 863.50), SIMDE_FLOAT16_VALUE( 828.50), SIMDE_FLOAT16_VALUE( -563.50), SIMDE_FLOAT16_VALUE( -576.50) }, - { SIMDE_FLOAT16_VALUE( -703.50), SIMDE_FLOAT16_VALUE( 384.00), SIMDE_FLOAT16_VALUE( -772.50), SIMDE_FLOAT16_VALUE( 457.50), - SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( 653.00), SIMDE_FLOAT16_VALUE( -121.00), SIMDE_FLOAT16_VALUE( 945.50) }, - { SIMDE_FLOAT16_VALUE( -280.75), SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( 688.50), SIMDE_FLOAT16_VALUE( 192.00) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(-12696.00), SIMDE_FLOAT16_VALUE( 12008.00), SIMDE_FLOAT16_VALUE(-14984.00), SIMDE_FLOAT16_VALUE( 15200.00), - SIMDE_FLOAT16_VALUE(-19680.00), SIMDE_FLOAT16_VALUE( 21360.00), SIMDE_FLOAT16_VALUE(-30304.00), SIMDE_FLOAT16_VALUE( 29168.00) } }, - { { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -323.75), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -283.75), - SIMDE_FLOAT16_VALUE( -117.75), SIMDE_FLOAT16_VALUE( -841.50), SIMDE_FLOAT16_VALUE( 665.00), SIMDE_FLOAT16_VALUE( -987.00) }, - { SIMDE_FLOAT16_VALUE( -643.00), SIMDE_FLOAT16_VALUE( -152.12), SIMDE_FLOAT16_VALUE( 964.00), SIMDE_FLOAT16_VALUE( 920.00), - SIMDE_FLOAT16_VALUE( 630.50), SIMDE_FLOAT16_VALUE( -669.50), SIMDE_FLOAT16_VALUE( 671.00), SIMDE_FLOAT16_VALUE( 257.00) }, - { SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -857.50), SIMDE_FLOAT16_VALUE( 334.75), SIMDE_FLOAT16_VALUE( -617.50) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( -2078.00), SIMDE_FLOAT16_VALUE( 1231.00), SIMDE_FLOAT16_VALUE( 8512.00), SIMDE_FLOAT16_VALUE( -9688.00), - SIMDE_FLOAT16_VALUE( -6960.00), SIMDE_FLOAT16_VALUE( 6000.00), SIMDE_FLOAT16_VALUE( 3292.00), SIMDE_FLOAT16_VALUE( -3614.00) } }, - { { SIMDE_FLOAT16_VALUE( -439.50), SIMDE_FLOAT16_VALUE( 245.12), SIMDE_FLOAT16_VALUE( 111.06), SIMDE_FLOAT16_VALUE( 520.50), - SIMDE_FLOAT16_VALUE( 85.50), SIMDE_FLOAT16_VALUE( 250.25), SIMDE_FLOAT16_VALUE( -680.00), SIMDE_FLOAT16_VALUE( -750.00) }, - { SIMDE_FLOAT16_VALUE( -138.25), SIMDE_FLOAT16_VALUE( -14.62), SIMDE_FLOAT16_VALUE( -921.50), SIMDE_FLOAT16_VALUE( 225.88), - SIMDE_FLOAT16_VALUE( 242.88), SIMDE_FLOAT16_VALUE( 869.50), SIMDE_FLOAT16_VALUE( 298.00), SIMDE_FLOAT16_VALUE( 105.69) }, - { SIMDE_FLOAT16_VALUE( -722.50), SIMDE_FLOAT16_VALUE( -8.75), SIMDE_FLOAT16_VALUE( -245.75), SIMDE_FLOAT16_VALUE( 915.50) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( -311.50), SIMDE_FLOAT16_VALUE( 117.25), SIMDE_FLOAT16_VALUE( -1865.00), SIMDE_FLOAT16_VALUE( 2496.00), - SIMDE_FLOAT16_VALUE( -7524.00), SIMDE_FLOAT16_VALUE( 7860.00), SIMDE_FLOAT16_VALUE( -1605.00), SIMDE_FLOAT16_VALUE( 174.75) } }, - { { SIMDE_FLOAT16_VALUE( 54.19), SIMDE_FLOAT16_VALUE( -928.00), SIMDE_FLOAT16_VALUE( 362.50), SIMDE_FLOAT16_VALUE( -936.50), - SIMDE_FLOAT16_VALUE( 185.88), SIMDE_FLOAT16_VALUE( -244.38), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( -644.00) }, - { SIMDE_FLOAT16_VALUE( -517.00), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -751.50), SIMDE_FLOAT16_VALUE( -974.00), - SIMDE_FLOAT16_VALUE( -144.38), SIMDE_FLOAT16_VALUE( 338.25), SIMDE_FLOAT16_VALUE( 705.00), SIMDE_FLOAT16_VALUE( 116.88) }, - { SIMDE_FLOAT16_VALUE( 49.38), SIMDE_FLOAT16_VALUE( -363.00), SIMDE_FLOAT16_VALUE( -476.25), SIMDE_FLOAT16_VALUE( 106.69) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-30304.00), SIMDE_FLOAT16_VALUE( 29440.00), SIMDE_FLOAT16_VALUE(-47744.00), SIMDE_FLOAT16_VALUE( 47168.00), - SIMDE_FLOAT16_VALUE( 16880.00), SIMDE_FLOAT16_VALUE(-16944.00), SIMDE_FLOAT16_VALUE( 6696.00), SIMDE_FLOAT16_VALUE( -6416.00) } }, - { { SIMDE_FLOAT16_VALUE( -726.00), SIMDE_FLOAT16_VALUE( -353.75), SIMDE_FLOAT16_VALUE( 268.50), SIMDE_FLOAT16_VALUE( 729.00), - SIMDE_FLOAT16_VALUE( -470.25), SIMDE_FLOAT16_VALUE( 81.88), SIMDE_FLOAT16_VALUE( 72.25), SIMDE_FLOAT16_VALUE( -992.50) }, - { SIMDE_FLOAT16_VALUE( -615.50), SIMDE_FLOAT16_VALUE( 620.50), SIMDE_FLOAT16_VALUE( -606.50), SIMDE_FLOAT16_VALUE( -327.75), - SIMDE_FLOAT16_VALUE( -331.75), SIMDE_FLOAT16_VALUE( -606.00), SIMDE_FLOAT16_VALUE( -295.75), SIMDE_FLOAT16_VALUE( -275.50) }, - { SIMDE_FLOAT16_VALUE( -752.50), SIMDE_FLOAT16_VALUE( 21.91), SIMDE_FLOAT16_VALUE( 827.00), SIMDE_FLOAT16_VALUE( 600.00) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( 12864.00), SIMDE_FLOAT16_VALUE(-13944.00), SIMDE_FLOAT16_VALUE( -6912.00), SIMDE_FLOAT16_VALUE( 7908.00), - SIMDE_FLOAT16_VALUE(-13744.00), SIMDE_FLOAT16_VALUE( 13360.00), SIMDE_FLOAT16_VALUE( -5964.00), SIMDE_FLOAT16_VALUE( 5044.00) } } - + + {{SIMDE_FLOAT16_VALUE(226.38), SIMDE_FLOAT16_VALUE(178.88), + SIMDE_FLOAT16_VALUE(181.75), SIMDE_FLOAT16_VALUE(420.50), + SIMDE_FLOAT16_VALUE(-699.00), SIMDE_FLOAT16_VALUE(-151.12), + SIMDE_FLOAT16_VALUE(-388.25), SIMDE_FLOAT16_VALUE(350.75)}, + {SIMDE_FLOAT16_VALUE(890.50), SIMDE_FLOAT16_VALUE(-665.00), + SIMDE_FLOAT16_VALUE(-618.00), SIMDE_FLOAT16_VALUE(190.88), + SIMDE_FLOAT16_VALUE(218.12), SIMDE_FLOAT16_VALUE(-329.00), + SIMDE_FLOAT16_VALUE(-549.50), SIMDE_FLOAT16_VALUE(-460.00)}, + {SIMDE_FLOAT16_VALUE(-49.28), SIMDE_FLOAT16_VALUE(-695.50), + SIMDE_FLOAT16_VALUE(121.50), SIMDE_FLOAT16_VALUE(-78.44)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(32992.00), SIMDE_FLOAT16_VALUE(-32592.00), + SIMDE_FLOAT16_VALUE(-9224.00), SIMDE_FLOAT16_VALUE(9824.00), + SIMDE_FLOAT16_VALUE(15512.00), SIMDE_FLOAT16_VALUE(-16368.00), + SIMDE_FLOAT16_VALUE(22288.00), SIMDE_FLOAT16_VALUE(-22320.00)}}, + {{SIMDE_FLOAT16_VALUE(-869.50), SIMDE_FLOAT16_VALUE(-710.00), + SIMDE_FLOAT16_VALUE(457.25), SIMDE_FLOAT16_VALUE(579.00), + SIMDE_FLOAT16_VALUE(-786.50), SIMDE_FLOAT16_VALUE(-974.00), + SIMDE_FLOAT16_VALUE(-570.50), SIMDE_FLOAT16_VALUE(369.25)}, + {SIMDE_FLOAT16_VALUE(48.06), SIMDE_FLOAT16_VALUE(-40.00), + SIMDE_FLOAT16_VALUE(-52.00), SIMDE_FLOAT16_VALUE(75.88), + SIMDE_FLOAT16_VALUE(352.25), SIMDE_FLOAT16_VALUE(324.25), + SIMDE_FLOAT16_VALUE(485.50), SIMDE_FLOAT16_VALUE(43.50)}, + {SIMDE_FLOAT16_VALUE(-976.00), SIMDE_FLOAT16_VALUE(89.44), + SIMDE_FLOAT16_VALUE(-100.62), SIMDE_FLOAT16_VALUE(-463.75)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-4448.00), SIMDE_FLOAT16_VALUE(2868.00), + SIMDE_FLOAT16_VALUE(7244.00), SIMDE_FLOAT16_VALUE(-6208.00), + SIMDE_FLOAT16_VALUE(28208.00), SIMDE_FLOAT16_VALUE(-29968.00), + SIMDE_FLOAT16_VALUE(3320.00), SIMDE_FLOAT16_VALUE(-3522.00)}}, + {{SIMDE_FLOAT16_VALUE(-973.00), SIMDE_FLOAT16_VALUE(-159.62), + SIMDE_FLOAT16_VALUE(-377.00), SIMDE_FLOAT16_VALUE(-853.00), + SIMDE_FLOAT16_VALUE(831.00), SIMDE_FLOAT16_VALUE(661.00), + SIMDE_FLOAT16_VALUE(-717.00), SIMDE_FLOAT16_VALUE(891.50)}, + {SIMDE_FLOAT16_VALUE(535.50), SIMDE_FLOAT16_VALUE(621.50), + SIMDE_FLOAT16_VALUE(578.50), SIMDE_FLOAT16_VALUE(430.50), + SIMDE_FLOAT16_VALUE(-574.50), SIMDE_FLOAT16_VALUE(-319.75), + SIMDE_FLOAT16_VALUE(-735.00), SIMDE_FLOAT16_VALUE(248.50)}, + {SIMDE_FLOAT16_VALUE(32.59), SIMDE_FLOAT16_VALUE(192.38), + SIMDE_FLOAT16_VALUE(679.00), SIMDE_FLOAT16_VALUE(502.25)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(19280.00), SIMDE_FLOAT16_VALUE(-20416.00), + SIMDE_FLOAT16_VALUE(13656.00), SIMDE_FLOAT16_VALUE(-14888.00), + SIMDE_FLOAT16_VALUE(-9592.00), SIMDE_FLOAT16_VALUE(11080.00), + SIMDE_FLOAT16_VALUE(7384.00), SIMDE_FLOAT16_VALUE(-7208.00)}}, + {{SIMDE_FLOAT16_VALUE(-618.50), SIMDE_FLOAT16_VALUE(-70.81), + SIMDE_FLOAT16_VALUE(-596.50), SIMDE_FLOAT16_VALUE(810.50), + SIMDE_FLOAT16_VALUE(863.50), SIMDE_FLOAT16_VALUE(828.50), + SIMDE_FLOAT16_VALUE(-563.50), SIMDE_FLOAT16_VALUE(-576.50)}, + {SIMDE_FLOAT16_VALUE(-703.50), SIMDE_FLOAT16_VALUE(384.00), + SIMDE_FLOAT16_VALUE(-772.50), SIMDE_FLOAT16_VALUE(457.50), + SIMDE_FLOAT16_VALUE(296.00), SIMDE_FLOAT16_VALUE(653.00), + SIMDE_FLOAT16_VALUE(-121.00), SIMDE_FLOAT16_VALUE(945.50)}, + {SIMDE_FLOAT16_VALUE(-280.75), SIMDE_FLOAT16_VALUE(-31.45), + SIMDE_FLOAT16_VALUE(688.50), SIMDE_FLOAT16_VALUE(192.00)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-12696.00), SIMDE_FLOAT16_VALUE(12008.00), + SIMDE_FLOAT16_VALUE(-14984.00), SIMDE_FLOAT16_VALUE(15200.00), + SIMDE_FLOAT16_VALUE(-19680.00), SIMDE_FLOAT16_VALUE(21360.00), + SIMDE_FLOAT16_VALUE(-30304.00), SIMDE_FLOAT16_VALUE(29168.00)}}, + {{SIMDE_FLOAT16_VALUE(-523.00), SIMDE_FLOAT16_VALUE(-323.75), + SIMDE_FLOAT16_VALUE(-888.00), SIMDE_FLOAT16_VALUE(-283.75), + SIMDE_FLOAT16_VALUE(-117.75), SIMDE_FLOAT16_VALUE(-841.50), + SIMDE_FLOAT16_VALUE(665.00), SIMDE_FLOAT16_VALUE(-987.00)}, + {SIMDE_FLOAT16_VALUE(-643.00), SIMDE_FLOAT16_VALUE(-152.12), + SIMDE_FLOAT16_VALUE(964.00), SIMDE_FLOAT16_VALUE(920.00), + SIMDE_FLOAT16_VALUE(630.50), SIMDE_FLOAT16_VALUE(-669.50), + SIMDE_FLOAT16_VALUE(671.00), SIMDE_FLOAT16_VALUE(257.00)}, + {SIMDE_FLOAT16_VALUE(10.22), SIMDE_FLOAT16_VALUE(-857.50), + SIMDE_FLOAT16_VALUE(334.75), SIMDE_FLOAT16_VALUE(-617.50)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-2078.00), SIMDE_FLOAT16_VALUE(1231.00), + SIMDE_FLOAT16_VALUE(8512.00), SIMDE_FLOAT16_VALUE(-9688.00), + SIMDE_FLOAT16_VALUE(-6960.00), SIMDE_FLOAT16_VALUE(6000.00), + SIMDE_FLOAT16_VALUE(3292.00), SIMDE_FLOAT16_VALUE(-3614.00)}}, + {{SIMDE_FLOAT16_VALUE(-439.50), SIMDE_FLOAT16_VALUE(245.12), + SIMDE_FLOAT16_VALUE(111.06), SIMDE_FLOAT16_VALUE(520.50), + SIMDE_FLOAT16_VALUE(85.50), SIMDE_FLOAT16_VALUE(250.25), + SIMDE_FLOAT16_VALUE(-680.00), SIMDE_FLOAT16_VALUE(-750.00)}, + {SIMDE_FLOAT16_VALUE(-138.25), SIMDE_FLOAT16_VALUE(-14.62), + SIMDE_FLOAT16_VALUE(-921.50), SIMDE_FLOAT16_VALUE(225.88), + SIMDE_FLOAT16_VALUE(242.88), SIMDE_FLOAT16_VALUE(869.50), + SIMDE_FLOAT16_VALUE(298.00), SIMDE_FLOAT16_VALUE(105.69)}, + {SIMDE_FLOAT16_VALUE(-722.50), SIMDE_FLOAT16_VALUE(-8.75), + SIMDE_FLOAT16_VALUE(-245.75), SIMDE_FLOAT16_VALUE(915.50)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-311.50), SIMDE_FLOAT16_VALUE(117.25), + SIMDE_FLOAT16_VALUE(-1865.00), SIMDE_FLOAT16_VALUE(2496.00), + SIMDE_FLOAT16_VALUE(-7524.00), SIMDE_FLOAT16_VALUE(7860.00), + SIMDE_FLOAT16_VALUE(-1605.00), SIMDE_FLOAT16_VALUE(174.75)}}, + {{SIMDE_FLOAT16_VALUE(54.19), SIMDE_FLOAT16_VALUE(-928.00), + SIMDE_FLOAT16_VALUE(362.50), SIMDE_FLOAT16_VALUE(-936.50), + SIMDE_FLOAT16_VALUE(185.88), SIMDE_FLOAT16_VALUE(-244.38), + SIMDE_FLOAT16_VALUE(924.50), SIMDE_FLOAT16_VALUE(-644.00)}, + {SIMDE_FLOAT16_VALUE(-517.00), SIMDE_FLOAT16_VALUE(-615.00), + SIMDE_FLOAT16_VALUE(-751.50), SIMDE_FLOAT16_VALUE(-974.00), + SIMDE_FLOAT16_VALUE(-144.38), SIMDE_FLOAT16_VALUE(338.25), + SIMDE_FLOAT16_VALUE(705.00), SIMDE_FLOAT16_VALUE(116.88)}, + {SIMDE_FLOAT16_VALUE(49.38), SIMDE_FLOAT16_VALUE(-363.00), + SIMDE_FLOAT16_VALUE(-476.25), SIMDE_FLOAT16_VALUE(106.69)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-30304.00), SIMDE_FLOAT16_VALUE(29440.00), + SIMDE_FLOAT16_VALUE(-47744.00), SIMDE_FLOAT16_VALUE(47168.00), + SIMDE_FLOAT16_VALUE(16880.00), SIMDE_FLOAT16_VALUE(-16944.00), + SIMDE_FLOAT16_VALUE(6696.00), SIMDE_FLOAT16_VALUE(-6416.00)}}, + {{SIMDE_FLOAT16_VALUE(-726.00), SIMDE_FLOAT16_VALUE(-353.75), + SIMDE_FLOAT16_VALUE(268.50), SIMDE_FLOAT16_VALUE(729.00), + SIMDE_FLOAT16_VALUE(-470.25), SIMDE_FLOAT16_VALUE(81.88), + SIMDE_FLOAT16_VALUE(72.25), SIMDE_FLOAT16_VALUE(-992.50)}, + {SIMDE_FLOAT16_VALUE(-615.50), SIMDE_FLOAT16_VALUE(620.50), + SIMDE_FLOAT16_VALUE(-606.50), SIMDE_FLOAT16_VALUE(-327.75), + SIMDE_FLOAT16_VALUE(-331.75), SIMDE_FLOAT16_VALUE(-606.00), + SIMDE_FLOAT16_VALUE(-295.75), SIMDE_FLOAT16_VALUE(-275.50)}, + {SIMDE_FLOAT16_VALUE(-752.50), SIMDE_FLOAT16_VALUE(21.91), + SIMDE_FLOAT16_VALUE(827.00), SIMDE_FLOAT16_VALUE(600.00)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(12864.00), SIMDE_FLOAT16_VALUE(-13944.00), + SIMDE_FLOAT16_VALUE(-6912.00), SIMDE_FLOAT16_VALUE(7908.00), + SIMDE_FLOAT16_VALUE(-13744.00), SIMDE_FLOAT16_VALUE(13360.00), + SIMDE_FLOAT16_VALUE(-5964.00), SIMDE_FLOAT16_VALUE(5044.00)}} + }; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x8_t r; - SIMDE_CONSTIFY_2_(simde_vcmlaq_rot270_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); - simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); - + SIMDE_CONSTIFY_2_( + simde_vcmlaq_rot270_lane_f16, r, + (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), + 1); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; + for (int i = 0; i < 8; i++) { simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x8_t r = simde_vcmlaq_lane_f16(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -458,8 +589,7 @@ test_simde_vcmlaq_rot270_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmlaq_rot270_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmlaq_rot270_lane_f32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[4]; @@ -468,69 +598,91 @@ test_simde_vcmlaq_rot270_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[4]; } test_vec[] = { - { { SIMDE_FLOAT32_C( 301.65), SIMDE_FLOAT32_C( 490.71), SIMDE_FLOAT32_C( -744.66), SIMDE_FLOAT32_C( -738.17) }, - { SIMDE_FLOAT32_C( -301.20), SIMDE_FLOAT32_C( -904.34), SIMDE_FLOAT32_C( 771.98), SIMDE_FLOAT32_C( 233.71) }, - { SIMDE_FLOAT32_C( 830.18), SIMDE_FLOAT32_C( 979.39) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-750463.375000), SIMDE_FLOAT32_C(751255.687500), SIMDE_FLOAT32_C(193276.718750), SIMDE_FLOAT32_C(-194759.546875) } }, - { { SIMDE_FLOAT32_C( -38.01), SIMDE_FLOAT32_C( 151.61), SIMDE_FLOAT32_C( 201.45), SIMDE_FLOAT32_C( -747.32) }, - { SIMDE_FLOAT32_C( -331.17), SIMDE_FLOAT32_C( 7.62), SIMDE_FLOAT32_C( -454.77), SIMDE_FLOAT32_C( -381.81) }, - { SIMDE_FLOAT32_C( 236.39), SIMDE_FLOAT32_C( -158.94) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(1763.281738), SIMDE_FLOAT32_C(-1649.681763), SIMDE_FLOAT32_C(-90054.617188), SIMDE_FLOAT32_C(89508.742188) } }, - { { SIMDE_FLOAT32_C( 605.85), SIMDE_FLOAT32_C( 244.27), SIMDE_FLOAT32_C( -426.53), SIMDE_FLOAT32_C( -969.18) }, - { SIMDE_FLOAT32_C( 322.13), SIMDE_FLOAT32_C( 863.77), SIMDE_FLOAT32_C( -685.35), SIMDE_FLOAT32_C( -710.70) }, - { SIMDE_FLOAT32_C( -791.12), SIMDE_FLOAT32_C( 373.53) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-682739.875000), SIMDE_FLOAT32_C(683590.000000), SIMDE_FLOAT32_C(561822.437500), SIMDE_FLOAT32_C(-563218.187500) } }, - { { SIMDE_FLOAT32_C( -606.46), SIMDE_FLOAT32_C( -507.55), SIMDE_FLOAT32_C( -68.24), SIMDE_FLOAT32_C( -823.05) }, - { SIMDE_FLOAT32_C( -359.95), SIMDE_FLOAT32_C( 611.92), SIMDE_FLOAT32_C( 514.14), SIMDE_FLOAT32_C( -660.86) }, - { SIMDE_FLOAT32_C( 181.81), SIMDE_FLOAT32_C( 115.86) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(110646.710938), SIMDE_FLOAT32_C(-111760.718750), SIMDE_FLOAT32_C(-120219.195312), SIMDE_FLOAT32_C(119327.898438) } }, - { { SIMDE_FLOAT32_C( 951.84), SIMDE_FLOAT32_C( -864.96), SIMDE_FLOAT32_C( 970.59), SIMDE_FLOAT32_C( 769.97) }, - { SIMDE_FLOAT32_C( 609.64), SIMDE_FLOAT32_C( -580.87), SIMDE_FLOAT32_C( 358.59), SIMDE_FLOAT32_C( 350.56) }, - { SIMDE_FLOAT32_C( 26.31), SIMDE_FLOAT32_C( 163.74) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-14330.849609), SIMDE_FLOAT32_C(14417.729492), SIMDE_FLOAT32_C(10193.823242), SIMDE_FLOAT32_C(-8453.263672) } }, - { { SIMDE_FLOAT32_C( -636.68), SIMDE_FLOAT32_C( 554.68), SIMDE_FLOAT32_C( -385.40), SIMDE_FLOAT32_C( -565.95) }, - { SIMDE_FLOAT32_C( 129.69), SIMDE_FLOAT32_C( 961.79), SIMDE_FLOAT32_C( -333.22), SIMDE_FLOAT32_C( 69.65) }, - { SIMDE_FLOAT32_C( 726.96), SIMDE_FLOAT32_C( 131.41) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(698546.187500), SIMDE_FLOAT32_C(-698628.187500), SIMDE_FLOAT32_C(50247.367188), SIMDE_FLOAT32_C(-51198.714844) } }, - { { SIMDE_FLOAT32_C( -211.91), SIMDE_FLOAT32_C( 829.24), SIMDE_FLOAT32_C( -475.13), SIMDE_FLOAT32_C( 562.33) }, - { SIMDE_FLOAT32_C( 515.59), SIMDE_FLOAT32_C( -290.69), SIMDE_FLOAT32_C( -816.53), SIMDE_FLOAT32_C( 17.39) }, - { SIMDE_FLOAT32_C( 268.14), SIMDE_FLOAT32_C( 729.88) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-78157.531250), SIMDE_FLOAT32_C(78774.859375), SIMDE_FLOAT32_C(4187.824707), SIMDE_FLOAT32_C(-4100.624512) } }, - { { SIMDE_FLOAT32_C( -894.99), SIMDE_FLOAT32_C( 516.42), SIMDE_FLOAT32_C( -169.55), SIMDE_FLOAT32_C( 696.41) }, - { SIMDE_FLOAT32_C( -388.51), SIMDE_FLOAT32_C( 987.71), SIMDE_FLOAT32_C( -91.49), SIMDE_FLOAT32_C( -970.85) }, - { SIMDE_FLOAT32_C( 357.28), SIMDE_FLOAT32_C( -28.01) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(351994.031250), SIMDE_FLOAT32_C(-352372.625000), SIMDE_FLOAT32_C(-347034.812500), SIMDE_FLOAT32_C(347561.687500) } } - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + {{SIMDE_FLOAT32_C(301.65), SIMDE_FLOAT32_C(490.71), + SIMDE_FLOAT32_C(-744.66), SIMDE_FLOAT32_C(-738.17)}, + {SIMDE_FLOAT32_C(-301.20), SIMDE_FLOAT32_C(-904.34), + SIMDE_FLOAT32_C(771.98), SIMDE_FLOAT32_C(233.71)}, + {SIMDE_FLOAT32_C(830.18), SIMDE_FLOAT32_C(979.39)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-750463.375000), SIMDE_FLOAT32_C(751255.687500), + SIMDE_FLOAT32_C(193276.718750), SIMDE_FLOAT32_C(-194759.546875)}}, + {{SIMDE_FLOAT32_C(-38.01), SIMDE_FLOAT32_C(151.61), + SIMDE_FLOAT32_C(201.45), SIMDE_FLOAT32_C(-747.32)}, + {SIMDE_FLOAT32_C(-331.17), SIMDE_FLOAT32_C(7.62), + SIMDE_FLOAT32_C(-454.77), SIMDE_FLOAT32_C(-381.81)}, + {SIMDE_FLOAT32_C(236.39), SIMDE_FLOAT32_C(-158.94)}, + INT32_C(0), + {SIMDE_FLOAT32_C(1763.281738), SIMDE_FLOAT32_C(-1649.681763), + SIMDE_FLOAT32_C(-90054.617188), SIMDE_FLOAT32_C(89508.742188)}}, + {{SIMDE_FLOAT32_C(605.85), SIMDE_FLOAT32_C(244.27), + SIMDE_FLOAT32_C(-426.53), SIMDE_FLOAT32_C(-969.18)}, + {SIMDE_FLOAT32_C(322.13), SIMDE_FLOAT32_C(863.77), + SIMDE_FLOAT32_C(-685.35), SIMDE_FLOAT32_C(-710.70)}, + {SIMDE_FLOAT32_C(-791.12), SIMDE_FLOAT32_C(373.53)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-682739.875000), SIMDE_FLOAT32_C(683590.000000), + SIMDE_FLOAT32_C(561822.437500), SIMDE_FLOAT32_C(-563218.187500)}}, + {{SIMDE_FLOAT32_C(-606.46), SIMDE_FLOAT32_C(-507.55), + SIMDE_FLOAT32_C(-68.24), SIMDE_FLOAT32_C(-823.05)}, + {SIMDE_FLOAT32_C(-359.95), SIMDE_FLOAT32_C(611.92), + SIMDE_FLOAT32_C(514.14), SIMDE_FLOAT32_C(-660.86)}, + {SIMDE_FLOAT32_C(181.81), SIMDE_FLOAT32_C(115.86)}, + INT32_C(0), + {SIMDE_FLOAT32_C(110646.710938), SIMDE_FLOAT32_C(-111760.718750), + SIMDE_FLOAT32_C(-120219.195312), SIMDE_FLOAT32_C(119327.898438)}}, + {{SIMDE_FLOAT32_C(951.84), SIMDE_FLOAT32_C(-864.96), + SIMDE_FLOAT32_C(970.59), SIMDE_FLOAT32_C(769.97)}, + {SIMDE_FLOAT32_C(609.64), SIMDE_FLOAT32_C(-580.87), + SIMDE_FLOAT32_C(358.59), SIMDE_FLOAT32_C(350.56)}, + {SIMDE_FLOAT32_C(26.31), SIMDE_FLOAT32_C(163.74)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-14330.849609), SIMDE_FLOAT32_C(14417.729492), + SIMDE_FLOAT32_C(10193.823242), SIMDE_FLOAT32_C(-8453.263672)}}, + {{SIMDE_FLOAT32_C(-636.68), SIMDE_FLOAT32_C(554.68), + SIMDE_FLOAT32_C(-385.40), SIMDE_FLOAT32_C(-565.95)}, + {SIMDE_FLOAT32_C(129.69), SIMDE_FLOAT32_C(961.79), + SIMDE_FLOAT32_C(-333.22), SIMDE_FLOAT32_C(69.65)}, + {SIMDE_FLOAT32_C(726.96), SIMDE_FLOAT32_C(131.41)}, + INT32_C(0), + {SIMDE_FLOAT32_C(698546.187500), SIMDE_FLOAT32_C(-698628.187500), + SIMDE_FLOAT32_C(50247.367188), SIMDE_FLOAT32_C(-51198.714844)}}, + {{SIMDE_FLOAT32_C(-211.91), SIMDE_FLOAT32_C(829.24), + SIMDE_FLOAT32_C(-475.13), SIMDE_FLOAT32_C(562.33)}, + {SIMDE_FLOAT32_C(515.59), SIMDE_FLOAT32_C(-290.69), + SIMDE_FLOAT32_C(-816.53), SIMDE_FLOAT32_C(17.39)}, + {SIMDE_FLOAT32_C(268.14), SIMDE_FLOAT32_C(729.88)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-78157.531250), SIMDE_FLOAT32_C(78774.859375), + SIMDE_FLOAT32_C(4187.824707), SIMDE_FLOAT32_C(-4100.624512)}}, + {{SIMDE_FLOAT32_C(-894.99), SIMDE_FLOAT32_C(516.42), + SIMDE_FLOAT32_C(-169.55), SIMDE_FLOAT32_C(696.41)}, + {SIMDE_FLOAT32_C(-388.51), SIMDE_FLOAT32_C(987.71), + SIMDE_FLOAT32_C(-91.49), SIMDE_FLOAT32_C(-970.85)}, + {SIMDE_FLOAT32_C(357.28), SIMDE_FLOAT32_C(-28.01)}, + INT32_C(0), + {SIMDE_FLOAT32_C(351994.031250), SIMDE_FLOAT32_C(-352372.625000), + SIMDE_FLOAT32_C(-347034.812500), SIMDE_FLOAT32_C(347561.687500)}}}; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x4_t r = simde_vcmlaq_rot270_lane_f32(r_, a, b, 0); - simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), + 1); // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); - } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 0, 0, 0, 0, 0, 0, 0}; + for (int i = 0; i < 8; i++) { simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcmlaq_lane_f32(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -541,8 +693,7 @@ test_simde_vcmlaq_rot270_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmlaq_rot270_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmlaq_rot270_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float16_t r_[8]; @@ -551,102 +702,167 @@ test_simde_vcmlaq_rot270_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[8]; } test_vec[] = { - { { SIMDE_FLOAT16_VALUE( -30.36), SIMDE_FLOAT16_VALUE( 631.50), SIMDE_FLOAT16_VALUE( 851.00), SIMDE_FLOAT16_VALUE( -263.50), - SIMDE_FLOAT16_VALUE( 140.00), SIMDE_FLOAT16_VALUE( 859.00), SIMDE_FLOAT16_VALUE( -834.50), SIMDE_FLOAT16_VALUE( 216.12) }, - { SIMDE_FLOAT16_VALUE( 996.00), SIMDE_FLOAT16_VALUE( 529.50), SIMDE_FLOAT16_VALUE( 79.06), SIMDE_FLOAT16_VALUE( 947.00), - SIMDE_FLOAT16_VALUE( 122.00), SIMDE_FLOAT16_VALUE( -250.00), SIMDE_FLOAT16_VALUE( -361.75), SIMDE_FLOAT16_VALUE( 265.25) }, - { SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 2.71), SIMDE_FLOAT16_VALUE( 99.81), SIMDE_FLOAT16_VALUE( -137.62), - SIMDE_FLOAT16_VALUE( -761.00), SIMDE_FLOAT16_VALUE( 813.00), SIMDE_FLOAT16_VALUE( -897.50), SIMDE_FLOAT16_VALUE( 653.50) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 31024.00), SIMDE_FLOAT16_VALUE(-30432.00), SIMDE_FLOAT16_VALUE( 56384.00), SIMDE_FLOAT16_VALUE(-55808.00), - SIMDE_FLOAT16_VALUE(-14528.00), SIMDE_FLOAT16_VALUE( 15520.00), SIMDE_FLOAT16_VALUE( 14728.00), SIMDE_FLOAT16_VALUE(-15344.00) } }, - { { SIMDE_FLOAT16_VALUE( 396.00), SIMDE_FLOAT16_VALUE( 413.00), SIMDE_FLOAT16_VALUE( 514.00), SIMDE_FLOAT16_VALUE( -977.50), - SIMDE_FLOAT16_VALUE( -672.00), SIMDE_FLOAT16_VALUE( -92.12), SIMDE_FLOAT16_VALUE( -441.25), SIMDE_FLOAT16_VALUE( -374.25) }, - { SIMDE_FLOAT16_VALUE( -152.00), SIMDE_FLOAT16_VALUE( -79.56), SIMDE_FLOAT16_VALUE( -214.62), SIMDE_FLOAT16_VALUE( -615.00), - SIMDE_FLOAT16_VALUE( 79.00), SIMDE_FLOAT16_VALUE( 84.00), SIMDE_FLOAT16_VALUE( 493.00), SIMDE_FLOAT16_VALUE( -96.00) }, - { SIMDE_FLOAT16_VALUE( 104.12), SIMDE_FLOAT16_VALUE( 78.50), SIMDE_FLOAT16_VALUE( 171.50), SIMDE_FLOAT16_VALUE( -682.50), - SIMDE_FLOAT16_VALUE( 217.12), SIMDE_FLOAT16_VALUE( 49.34), SIMDE_FLOAT16_VALUE( 256.50), SIMDE_FLOAT16_VALUE( -92.06) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( -5848.00), SIMDE_FLOAT16_VALUE( 6660.00), SIMDE_FLOAT16_VALUE(-47776.00), SIMDE_FLOAT16_VALUE( 47296.00), - SIMDE_FLOAT16_VALUE( 5920.00), SIMDE_FLOAT16_VALUE( -6688.00), SIMDE_FLOAT16_VALUE( -7976.00), SIMDE_FLOAT16_VALUE( 7160.00) } }, - { { SIMDE_FLOAT16_VALUE( -728.00), SIMDE_FLOAT16_VALUE( -108.38), SIMDE_FLOAT16_VALUE( -77.88), SIMDE_FLOAT16_VALUE( -353.00), - SIMDE_FLOAT16_VALUE( -239.00), SIMDE_FLOAT16_VALUE( 704.50), SIMDE_FLOAT16_VALUE( 914.00), SIMDE_FLOAT16_VALUE( -211.12) }, - { SIMDE_FLOAT16_VALUE( -473.25), SIMDE_FLOAT16_VALUE( 74.38), SIMDE_FLOAT16_VALUE( 904.50), SIMDE_FLOAT16_VALUE( -290.50), - SIMDE_FLOAT16_VALUE( -796.00), SIMDE_FLOAT16_VALUE( 421.25), SIMDE_FLOAT16_VALUE( 215.75), SIMDE_FLOAT16_VALUE( 249.38) }, - { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -720.00), SIMDE_FLOAT16_VALUE( 32.75), SIMDE_FLOAT16_VALUE( -487.75), - SIMDE_FLOAT16_VALUE( -705.50), SIMDE_FLOAT16_VALUE( -468.00), SIMDE_FLOAT16_VALUE( -789.00), SIMDE_FLOAT16_VALUE( -866.00) }, - INT32_C( 2), - { SIMDE_FLOAT16_VALUE( 1708.00), SIMDE_FLOAT16_VALUE( -2544.00), SIMDE_FLOAT16_VALUE( -9592.00), SIMDE_FLOAT16_VALUE( 9160.00), - SIMDE_FLOAT16_VALUE( 13560.00), SIMDE_FLOAT16_VALUE(-13088.00), SIMDE_FLOAT16_VALUE( 9080.00), SIMDE_FLOAT16_VALUE( -8376.00) } }, - { { SIMDE_FLOAT16_VALUE( -891.50), SIMDE_FLOAT16_VALUE( -299.00), SIMDE_FLOAT16_VALUE( -595.00), SIMDE_FLOAT16_VALUE( -662.00), - SIMDE_FLOAT16_VALUE( -914.00), SIMDE_FLOAT16_VALUE( 674.50), SIMDE_FLOAT16_VALUE( 771.50), SIMDE_FLOAT16_VALUE( 14.33) }, - { SIMDE_FLOAT16_VALUE( 880.00), SIMDE_FLOAT16_VALUE( 767.00), SIMDE_FLOAT16_VALUE( -738.50), SIMDE_FLOAT16_VALUE( 581.50), - SIMDE_FLOAT16_VALUE( -342.00), SIMDE_FLOAT16_VALUE( 580.50), SIMDE_FLOAT16_VALUE( 534.00), SIMDE_FLOAT16_VALUE( -671.00) }, - { SIMDE_FLOAT16_VALUE( -482.75), SIMDE_FLOAT16_VALUE( 382.25), SIMDE_FLOAT16_VALUE( 503.00), SIMDE_FLOAT16_VALUE( 35.00), - SIMDE_FLOAT16_VALUE( 315.50), SIMDE_FLOAT16_VALUE( -23.56), SIMDE_FLOAT16_VALUE( 53.88), SIMDE_FLOAT16_VALUE( 722.00) }, - INT32_C( 3), - { SIMDE_FLOAT16_VALUE( 25952.00), SIMDE_FLOAT16_VALUE(-27136.00), SIMDE_FLOAT16_VALUE( 19760.00), SIMDE_FLOAT16_VALUE(-21008.00), - SIMDE_FLOAT16_VALUE( 19408.00), SIMDE_FLOAT16_VALUE(-19648.00), SIMDE_FLOAT16_VALUE(-22720.00), SIMDE_FLOAT16_VALUE( 23504.00) } }, - { { SIMDE_FLOAT16_VALUE( 525.50), SIMDE_FLOAT16_VALUE( -679.00), SIMDE_FLOAT16_VALUE( 491.50), SIMDE_FLOAT16_VALUE( -505.00), - SIMDE_FLOAT16_VALUE( 914.50), SIMDE_FLOAT16_VALUE( -312.00), SIMDE_FLOAT16_VALUE( -404.50), SIMDE_FLOAT16_VALUE( -634.00) }, - { SIMDE_FLOAT16_VALUE( -86.62), SIMDE_FLOAT16_VALUE( -914.50), SIMDE_FLOAT16_VALUE( -839.50), SIMDE_FLOAT16_VALUE( 817.50), - SIMDE_FLOAT16_VALUE( -187.25), SIMDE_FLOAT16_VALUE( 422.75), SIMDE_FLOAT16_VALUE( 604.50), SIMDE_FLOAT16_VALUE( -735.00) }, - { SIMDE_FLOAT16_VALUE( -21.31), SIMDE_FLOAT16_VALUE( -29.59), SIMDE_FLOAT16_VALUE( -725.00), SIMDE_FLOAT16_VALUE( -503.00), - SIMDE_FLOAT16_VALUE( -75.56), SIMDE_FLOAT16_VALUE( 215.38), SIMDE_FLOAT16_VALUE( -742.00), SIMDE_FLOAT16_VALUE( -854.00) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 20016.00), SIMDE_FLOAT16_VALUE(-20176.00), SIMDE_FLOAT16_VALUE(-16928.00), SIMDE_FLOAT16_VALUE( 16912.00), - SIMDE_FLOAT16_VALUE( -8096.00), SIMDE_FLOAT16_VALUE( 8696.00), SIMDE_FLOAT16_VALUE( 15264.00), SIMDE_FLOAT16_VALUE(-16296.00) } }, - { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( 164.88), SIMDE_FLOAT16_VALUE( 304.75), SIMDE_FLOAT16_VALUE( -530.50), - SIMDE_FLOAT16_VALUE( -603.50), SIMDE_FLOAT16_VALUE( 730.50), SIMDE_FLOAT16_VALUE( 46.66), SIMDE_FLOAT16_VALUE( 629.00) }, - { SIMDE_FLOAT16_VALUE( -537.00), SIMDE_FLOAT16_VALUE( 637.00), SIMDE_FLOAT16_VALUE( 884.50), SIMDE_FLOAT16_VALUE( 378.25), - SIMDE_FLOAT16_VALUE( -10.17), SIMDE_FLOAT16_VALUE( -730.00), SIMDE_FLOAT16_VALUE( -981.50), SIMDE_FLOAT16_VALUE( 453.25) }, - { SIMDE_FLOAT16_VALUE( -575.00), SIMDE_FLOAT16_VALUE( 12.88), SIMDE_FLOAT16_VALUE( -667.50), SIMDE_FLOAT16_VALUE( 380.50), - SIMDE_FLOAT16_VALUE( 374.75), SIMDE_FLOAT16_VALUE( -222.50), SIMDE_FLOAT16_VALUE( 206.88), SIMDE_FLOAT16_VALUE( 502.25) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( 7588.00), SIMDE_FLOAT16_VALUE( -8040.00), SIMDE_FLOAT16_VALUE( 5176.00), SIMDE_FLOAT16_VALUE( -5404.00), - SIMDE_FLOAT16_VALUE(-10008.00), SIMDE_FLOAT16_VALUE( 10136.00), SIMDE_FLOAT16_VALUE( 5884.00), SIMDE_FLOAT16_VALUE( -5212.00) } }, - { { SIMDE_FLOAT16_VALUE( -825.50), SIMDE_FLOAT16_VALUE( -472.75), SIMDE_FLOAT16_VALUE( -531.00), SIMDE_FLOAT16_VALUE( -366.75), - SIMDE_FLOAT16_VALUE( 143.12), SIMDE_FLOAT16_VALUE( 698.50), SIMDE_FLOAT16_VALUE( 700.00), SIMDE_FLOAT16_VALUE( 498.25) }, - { SIMDE_FLOAT16_VALUE( 908.00), SIMDE_FLOAT16_VALUE( 845.50), SIMDE_FLOAT16_VALUE( -383.50), SIMDE_FLOAT16_VALUE( 383.50), - SIMDE_FLOAT16_VALUE( 357.75), SIMDE_FLOAT16_VALUE( -900.50), SIMDE_FLOAT16_VALUE( -802.00), SIMDE_FLOAT16_VALUE( 966.50) }, - { SIMDE_FLOAT16_VALUE( -993.00), SIMDE_FLOAT16_VALUE( 477.50), SIMDE_FLOAT16_VALUE( -23.00), SIMDE_FLOAT16_VALUE( 102.38), - SIMDE_FLOAT16_VALUE( 988.50), SIMDE_FLOAT16_VALUE( -311.75), SIMDE_FLOAT16_VALUE( -668.50), SIMDE_FLOAT16_VALUE( 148.25) }, - INT32_C( 2), - { SIMDE_FLOAT16_VALUE(-20272.00), SIMDE_FLOAT16_VALUE( 18976.00), SIMDE_FLOAT16_VALUE( -9352.00), SIMDE_FLOAT16_VALUE( 8456.00), - SIMDE_FLOAT16_VALUE( 20848.00), SIMDE_FLOAT16_VALUE(-20016.00), SIMDE_FLOAT16_VALUE(-21536.00), SIMDE_FLOAT16_VALUE( 22720.00) } }, - { { SIMDE_FLOAT16_VALUE( 213.88), SIMDE_FLOAT16_VALUE( 337.75), SIMDE_FLOAT16_VALUE( 330.50), SIMDE_FLOAT16_VALUE( -88.56), - SIMDE_FLOAT16_VALUE( 191.12), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 775.50) }, - { SIMDE_FLOAT16_VALUE( 295.50), SIMDE_FLOAT16_VALUE( 687.00), SIMDE_FLOAT16_VALUE( 406.25), SIMDE_FLOAT16_VALUE( 439.50), - SIMDE_FLOAT16_VALUE( -827.50), SIMDE_FLOAT16_VALUE( 733.00), SIMDE_FLOAT16_VALUE( 499.00), SIMDE_FLOAT16_VALUE( 931.00) }, - { SIMDE_FLOAT16_VALUE( 790.00), SIMDE_FLOAT16_VALUE( -979.00), SIMDE_FLOAT16_VALUE( 70.62), SIMDE_FLOAT16_VALUE( -47.00), - SIMDE_FLOAT16_VALUE( 228.50), SIMDE_FLOAT16_VALUE( -233.50), SIMDE_FLOAT16_VALUE( -467.50), SIMDE_FLOAT16_VALUE( 545.00) }, - INT32_C( 3), - { SIMDE_FLOAT16_VALUE(-32080.00), SIMDE_FLOAT16_VALUE( 32624.00), SIMDE_FLOAT16_VALUE(-20320.00), SIMDE_FLOAT16_VALUE( 20560.00), - SIMDE_FLOAT16_VALUE(-34272.00), SIMDE_FLOAT16_VALUE( 33824.00), SIMDE_FLOAT16_VALUE(-43872.00), SIMDE_FLOAT16_VALUE( 44544.00) } } - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + {{SIMDE_FLOAT16_VALUE(-30.36), SIMDE_FLOAT16_VALUE(631.50), + SIMDE_FLOAT16_VALUE(851.00), SIMDE_FLOAT16_VALUE(-263.50), + SIMDE_FLOAT16_VALUE(140.00), SIMDE_FLOAT16_VALUE(859.00), + SIMDE_FLOAT16_VALUE(-834.50), SIMDE_FLOAT16_VALUE(216.12)}, + {SIMDE_FLOAT16_VALUE(996.00), SIMDE_FLOAT16_VALUE(529.50), + SIMDE_FLOAT16_VALUE(79.06), SIMDE_FLOAT16_VALUE(947.00), + SIMDE_FLOAT16_VALUE(122.00), SIMDE_FLOAT16_VALUE(-250.00), + SIMDE_FLOAT16_VALUE(-361.75), SIMDE_FLOAT16_VALUE(265.25)}, + {SIMDE_FLOAT16_VALUE(58.66), SIMDE_FLOAT16_VALUE(2.71), + SIMDE_FLOAT16_VALUE(99.81), SIMDE_FLOAT16_VALUE(-137.62), + SIMDE_FLOAT16_VALUE(-761.00), SIMDE_FLOAT16_VALUE(813.00), + SIMDE_FLOAT16_VALUE(-897.50), SIMDE_FLOAT16_VALUE(653.50)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(31024.00), SIMDE_FLOAT16_VALUE(-30432.00), + SIMDE_FLOAT16_VALUE(56384.00), SIMDE_FLOAT16_VALUE(-55808.00), + SIMDE_FLOAT16_VALUE(-14528.00), SIMDE_FLOAT16_VALUE(15520.00), + SIMDE_FLOAT16_VALUE(14728.00), SIMDE_FLOAT16_VALUE(-15344.00)}}, + {{SIMDE_FLOAT16_VALUE(396.00), SIMDE_FLOAT16_VALUE(413.00), + SIMDE_FLOAT16_VALUE(514.00), SIMDE_FLOAT16_VALUE(-977.50), + SIMDE_FLOAT16_VALUE(-672.00), SIMDE_FLOAT16_VALUE(-92.12), + SIMDE_FLOAT16_VALUE(-441.25), SIMDE_FLOAT16_VALUE(-374.25)}, + {SIMDE_FLOAT16_VALUE(-152.00), SIMDE_FLOAT16_VALUE(-79.56), + SIMDE_FLOAT16_VALUE(-214.62), SIMDE_FLOAT16_VALUE(-615.00), + SIMDE_FLOAT16_VALUE(79.00), SIMDE_FLOAT16_VALUE(84.00), + SIMDE_FLOAT16_VALUE(493.00), SIMDE_FLOAT16_VALUE(-96.00)}, + {SIMDE_FLOAT16_VALUE(104.12), SIMDE_FLOAT16_VALUE(78.50), + SIMDE_FLOAT16_VALUE(171.50), SIMDE_FLOAT16_VALUE(-682.50), + SIMDE_FLOAT16_VALUE(217.12), SIMDE_FLOAT16_VALUE(49.34), + SIMDE_FLOAT16_VALUE(256.50), SIMDE_FLOAT16_VALUE(-92.06)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-5848.00), SIMDE_FLOAT16_VALUE(6660.00), + SIMDE_FLOAT16_VALUE(-47776.00), SIMDE_FLOAT16_VALUE(47296.00), + SIMDE_FLOAT16_VALUE(5920.00), SIMDE_FLOAT16_VALUE(-6688.00), + SIMDE_FLOAT16_VALUE(-7976.00), SIMDE_FLOAT16_VALUE(7160.00)}}, + {{SIMDE_FLOAT16_VALUE(-728.00), SIMDE_FLOAT16_VALUE(-108.38), + SIMDE_FLOAT16_VALUE(-77.88), SIMDE_FLOAT16_VALUE(-353.00), + SIMDE_FLOAT16_VALUE(-239.00), SIMDE_FLOAT16_VALUE(704.50), + SIMDE_FLOAT16_VALUE(914.00), SIMDE_FLOAT16_VALUE(-211.12)}, + {SIMDE_FLOAT16_VALUE(-473.25), SIMDE_FLOAT16_VALUE(74.38), + SIMDE_FLOAT16_VALUE(904.50), SIMDE_FLOAT16_VALUE(-290.50), + SIMDE_FLOAT16_VALUE(-796.00), SIMDE_FLOAT16_VALUE(421.25), + SIMDE_FLOAT16_VALUE(215.75), SIMDE_FLOAT16_VALUE(249.38)}, + {SIMDE_FLOAT16_VALUE(-523.00), SIMDE_FLOAT16_VALUE(-720.00), + SIMDE_FLOAT16_VALUE(32.75), SIMDE_FLOAT16_VALUE(-487.75), + SIMDE_FLOAT16_VALUE(-705.50), SIMDE_FLOAT16_VALUE(-468.00), + SIMDE_FLOAT16_VALUE(-789.00), SIMDE_FLOAT16_VALUE(-866.00)}, + INT32_C(2), + {SIMDE_FLOAT16_VALUE(1708.00), SIMDE_FLOAT16_VALUE(-2544.00), + SIMDE_FLOAT16_VALUE(-9592.00), SIMDE_FLOAT16_VALUE(9160.00), + SIMDE_FLOAT16_VALUE(13560.00), SIMDE_FLOAT16_VALUE(-13088.00), + SIMDE_FLOAT16_VALUE(9080.00), SIMDE_FLOAT16_VALUE(-8376.00)}}, + {{SIMDE_FLOAT16_VALUE(-891.50), SIMDE_FLOAT16_VALUE(-299.00), + SIMDE_FLOAT16_VALUE(-595.00), SIMDE_FLOAT16_VALUE(-662.00), + SIMDE_FLOAT16_VALUE(-914.00), SIMDE_FLOAT16_VALUE(674.50), + SIMDE_FLOAT16_VALUE(771.50), SIMDE_FLOAT16_VALUE(14.33)}, + {SIMDE_FLOAT16_VALUE(880.00), SIMDE_FLOAT16_VALUE(767.00), + SIMDE_FLOAT16_VALUE(-738.50), SIMDE_FLOAT16_VALUE(581.50), + SIMDE_FLOAT16_VALUE(-342.00), SIMDE_FLOAT16_VALUE(580.50), + SIMDE_FLOAT16_VALUE(534.00), SIMDE_FLOAT16_VALUE(-671.00)}, + {SIMDE_FLOAT16_VALUE(-482.75), SIMDE_FLOAT16_VALUE(382.25), + SIMDE_FLOAT16_VALUE(503.00), SIMDE_FLOAT16_VALUE(35.00), + SIMDE_FLOAT16_VALUE(315.50), SIMDE_FLOAT16_VALUE(-23.56), + SIMDE_FLOAT16_VALUE(53.88), SIMDE_FLOAT16_VALUE(722.00)}, + INT32_C(3), + {SIMDE_FLOAT16_VALUE(25952.00), SIMDE_FLOAT16_VALUE(-27136.00), + SIMDE_FLOAT16_VALUE(19760.00), SIMDE_FLOAT16_VALUE(-21008.00), + SIMDE_FLOAT16_VALUE(19408.00), SIMDE_FLOAT16_VALUE(-19648.00), + SIMDE_FLOAT16_VALUE(-22720.00), SIMDE_FLOAT16_VALUE(23504.00)}}, + {{SIMDE_FLOAT16_VALUE(525.50), SIMDE_FLOAT16_VALUE(-679.00), + SIMDE_FLOAT16_VALUE(491.50), SIMDE_FLOAT16_VALUE(-505.00), + SIMDE_FLOAT16_VALUE(914.50), SIMDE_FLOAT16_VALUE(-312.00), + SIMDE_FLOAT16_VALUE(-404.50), SIMDE_FLOAT16_VALUE(-634.00)}, + {SIMDE_FLOAT16_VALUE(-86.62), SIMDE_FLOAT16_VALUE(-914.50), + SIMDE_FLOAT16_VALUE(-839.50), SIMDE_FLOAT16_VALUE(817.50), + SIMDE_FLOAT16_VALUE(-187.25), SIMDE_FLOAT16_VALUE(422.75), + SIMDE_FLOAT16_VALUE(604.50), SIMDE_FLOAT16_VALUE(-735.00)}, + {SIMDE_FLOAT16_VALUE(-21.31), SIMDE_FLOAT16_VALUE(-29.59), + SIMDE_FLOAT16_VALUE(-725.00), SIMDE_FLOAT16_VALUE(-503.00), + SIMDE_FLOAT16_VALUE(-75.56), SIMDE_FLOAT16_VALUE(215.38), + SIMDE_FLOAT16_VALUE(-742.00), SIMDE_FLOAT16_VALUE(-854.00)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(20016.00), SIMDE_FLOAT16_VALUE(-20176.00), + SIMDE_FLOAT16_VALUE(-16928.00), SIMDE_FLOAT16_VALUE(16912.00), + SIMDE_FLOAT16_VALUE(-8096.00), SIMDE_FLOAT16_VALUE(8696.00), + SIMDE_FLOAT16_VALUE(15264.00), SIMDE_FLOAT16_VALUE(-16296.00)}}, + {{SIMDE_FLOAT16_VALUE(-618.50), SIMDE_FLOAT16_VALUE(164.88), + SIMDE_FLOAT16_VALUE(304.75), SIMDE_FLOAT16_VALUE(-530.50), + SIMDE_FLOAT16_VALUE(-603.50), SIMDE_FLOAT16_VALUE(730.50), + SIMDE_FLOAT16_VALUE(46.66), SIMDE_FLOAT16_VALUE(629.00)}, + {SIMDE_FLOAT16_VALUE(-537.00), SIMDE_FLOAT16_VALUE(637.00), + SIMDE_FLOAT16_VALUE(884.50), SIMDE_FLOAT16_VALUE(378.25), + SIMDE_FLOAT16_VALUE(-10.17), SIMDE_FLOAT16_VALUE(-730.00), + SIMDE_FLOAT16_VALUE(-981.50), SIMDE_FLOAT16_VALUE(453.25)}, + {SIMDE_FLOAT16_VALUE(-575.00), SIMDE_FLOAT16_VALUE(12.88), + SIMDE_FLOAT16_VALUE(-667.50), SIMDE_FLOAT16_VALUE(380.50), + SIMDE_FLOAT16_VALUE(374.75), SIMDE_FLOAT16_VALUE(-222.50), + SIMDE_FLOAT16_VALUE(206.88), SIMDE_FLOAT16_VALUE(502.25)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(7588.00), SIMDE_FLOAT16_VALUE(-8040.00), + SIMDE_FLOAT16_VALUE(5176.00), SIMDE_FLOAT16_VALUE(-5404.00), + SIMDE_FLOAT16_VALUE(-10008.00), SIMDE_FLOAT16_VALUE(10136.00), + SIMDE_FLOAT16_VALUE(5884.00), SIMDE_FLOAT16_VALUE(-5212.00)}}, + {{SIMDE_FLOAT16_VALUE(-825.50), SIMDE_FLOAT16_VALUE(-472.75), + SIMDE_FLOAT16_VALUE(-531.00), SIMDE_FLOAT16_VALUE(-366.75), + SIMDE_FLOAT16_VALUE(143.12), SIMDE_FLOAT16_VALUE(698.50), + SIMDE_FLOAT16_VALUE(700.00), SIMDE_FLOAT16_VALUE(498.25)}, + {SIMDE_FLOAT16_VALUE(908.00), SIMDE_FLOAT16_VALUE(845.50), + SIMDE_FLOAT16_VALUE(-383.50), SIMDE_FLOAT16_VALUE(383.50), + SIMDE_FLOAT16_VALUE(357.75), SIMDE_FLOAT16_VALUE(-900.50), + SIMDE_FLOAT16_VALUE(-802.00), SIMDE_FLOAT16_VALUE(966.50)}, + {SIMDE_FLOAT16_VALUE(-993.00), SIMDE_FLOAT16_VALUE(477.50), + SIMDE_FLOAT16_VALUE(-23.00), SIMDE_FLOAT16_VALUE(102.38), + SIMDE_FLOAT16_VALUE(988.50), SIMDE_FLOAT16_VALUE(-311.75), + SIMDE_FLOAT16_VALUE(-668.50), SIMDE_FLOAT16_VALUE(148.25)}, + INT32_C(2), + {SIMDE_FLOAT16_VALUE(-20272.00), SIMDE_FLOAT16_VALUE(18976.00), + SIMDE_FLOAT16_VALUE(-9352.00), SIMDE_FLOAT16_VALUE(8456.00), + SIMDE_FLOAT16_VALUE(20848.00), SIMDE_FLOAT16_VALUE(-20016.00), + SIMDE_FLOAT16_VALUE(-21536.00), SIMDE_FLOAT16_VALUE(22720.00)}}, + {{SIMDE_FLOAT16_VALUE(213.88), SIMDE_FLOAT16_VALUE(337.75), + SIMDE_FLOAT16_VALUE(330.50), SIMDE_FLOAT16_VALUE(-88.56), + SIMDE_FLOAT16_VALUE(191.12), SIMDE_FLOAT16_VALUE(-615.00), + SIMDE_FLOAT16_VALUE(-113.25), SIMDE_FLOAT16_VALUE(775.50)}, + {SIMDE_FLOAT16_VALUE(295.50), SIMDE_FLOAT16_VALUE(687.00), + SIMDE_FLOAT16_VALUE(406.25), SIMDE_FLOAT16_VALUE(439.50), + SIMDE_FLOAT16_VALUE(-827.50), SIMDE_FLOAT16_VALUE(733.00), + SIMDE_FLOAT16_VALUE(499.00), SIMDE_FLOAT16_VALUE(931.00)}, + {SIMDE_FLOAT16_VALUE(790.00), SIMDE_FLOAT16_VALUE(-979.00), + SIMDE_FLOAT16_VALUE(70.62), SIMDE_FLOAT16_VALUE(-47.00), + SIMDE_FLOAT16_VALUE(228.50), SIMDE_FLOAT16_VALUE(-233.50), + SIMDE_FLOAT16_VALUE(-467.50), SIMDE_FLOAT16_VALUE(545.00)}, + INT32_C(3), + {SIMDE_FLOAT16_VALUE(-32080.00), SIMDE_FLOAT16_VALUE(32624.00), + SIMDE_FLOAT16_VALUE(-20320.00), SIMDE_FLOAT16_VALUE(20560.00), + SIMDE_FLOAT16_VALUE(-34272.00), SIMDE_FLOAT16_VALUE(33824.00), + SIMDE_FLOAT16_VALUE(-43872.00), SIMDE_FLOAT16_VALUE(44544.00)}}}; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); - - simde_float16x8_t r; - SIMDE_CONSTIFY_4_(simde_vcmlaq_rot270_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); - simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); - + + simde_float16x8_t r; + SIMDE_CONSTIFY_4_( + simde_vcmlaq_rot270_laneq_f16, r, + (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), + 1); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 2, 3, 0, 1, 2, 3}; + for (int i = 0; i < 8; i++) { simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t r = simde_vcmlaq_laneq_f16(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -657,8 +873,7 @@ test_simde_vcmlaq_rot270_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmlaq_rot270_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmlaq_rot270_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[4]; @@ -667,70 +882,103 @@ test_simde_vcmlaq_rot270_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[4]; } test_vec[] = { - { { SIMDE_FLOAT32_C( 355.18), SIMDE_FLOAT32_C( 169.63), SIMDE_FLOAT32_C( 116.87), SIMDE_FLOAT32_C( -467.19) }, - { SIMDE_FLOAT32_C( -513.94), SIMDE_FLOAT32_C( -999.71), SIMDE_FLOAT32_C( -285.25), SIMDE_FLOAT32_C( -931.88) }, - { SIMDE_FLOAT32_C( -839.57), SIMDE_FLOAT32_C( -681.40), SIMDE_FLOAT32_C( -117.60), SIMDE_FLOAT32_C( -459.86) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(839681.750000), SIMDE_FLOAT32_C(-839156.937500), SIMDE_FLOAT32_C(782495.375000), SIMDE_FLOAT32_C(-782845.687500) } }, - { { SIMDE_FLOAT32_C( -57.67), SIMDE_FLOAT32_C( -897.07), SIMDE_FLOAT32_C( 118.98), SIMDE_FLOAT32_C( -387.92) }, - { SIMDE_FLOAT32_C( -362.79), SIMDE_FLOAT32_C( 160.99), SIMDE_FLOAT32_C( -2.72), SIMDE_FLOAT32_C( 206.65) }, - { SIMDE_FLOAT32_C( 49.34), SIMDE_FLOAT32_C( 511.85), SIMDE_FLOAT32_C( 547.20), SIMDE_FLOAT32_C( -119.58) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(82345.062500), SIMDE_FLOAT32_C(-83299.804688), SIMDE_FLOAT32_C(105892.781250), SIMDE_FLOAT32_C(-106161.718750) } }, - { { SIMDE_FLOAT32_C( -219.54), SIMDE_FLOAT32_C( -959.14), SIMDE_FLOAT32_C( 943.92), SIMDE_FLOAT32_C( 628.48) }, - { SIMDE_FLOAT32_C( 446.65), SIMDE_FLOAT32_C( -500.77), SIMDE_FLOAT32_C( -347.79), SIMDE_FLOAT32_C( 813.11) }, - { SIMDE_FLOAT32_C( -542.25), SIMDE_FLOAT32_C( 232.48), SIMDE_FLOAT32_C( 684.35), SIMDE_FLOAT32_C( 710.26) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(271323.000000), SIMDE_FLOAT32_C(-272501.656250), SIMDE_FLOAT32_C(-439964.968750), SIMDE_FLOAT32_C(441537.375000) } }, - { { SIMDE_FLOAT32_C( 783.09), SIMDE_FLOAT32_C( -727.02), SIMDE_FLOAT32_C( -586.46), SIMDE_FLOAT32_C( 64.33) }, - { SIMDE_FLOAT32_C( -490.08), SIMDE_FLOAT32_C( 740.49), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -759.78) }, - { SIMDE_FLOAT32_C( -380.84), SIMDE_FLOAT32_C( 993.01), SIMDE_FLOAT32_C( -759.56), SIMDE_FLOAT32_C( 861.16) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(736097.062500), SIMDE_FLOAT32_C(-736041.000000), SIMDE_FLOAT32_C(-755055.625000), SIMDE_FLOAT32_C(754533.500000) } }, - { { SIMDE_FLOAT32_C( 998.31), SIMDE_FLOAT32_C( 538.40), SIMDE_FLOAT32_C( -191.12), SIMDE_FLOAT32_C( -434.48) }, - { SIMDE_FLOAT32_C( 592.83), SIMDE_FLOAT32_C( 820.32), SIMDE_FLOAT32_C( -296.84), SIMDE_FLOAT32_C( -612.30) }, - { SIMDE_FLOAT32_C( -552.34), SIMDE_FLOAT32_C( 329.08), SIMDE_FLOAT32_C( 765.26), SIMDE_FLOAT32_C( -531.08) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-452097.250000), SIMDE_FLOAT32_C(453633.968750), SIMDE_FLOAT32_C(338006.656250), SIMDE_FLOAT32_C(-338632.281250) } }, - { { SIMDE_FLOAT32_C( 52.61), SIMDE_FLOAT32_C( 606.93), SIMDE_FLOAT32_C( -894.22), SIMDE_FLOAT32_C( -854.38) }, - { SIMDE_FLOAT32_C( 972.80), SIMDE_FLOAT32_C( -807.39), SIMDE_FLOAT32_C( 668.59), SIMDE_FLOAT32_C( -228.19) }, - { SIMDE_FLOAT32_C( -528.51), SIMDE_FLOAT32_C( 730.93), SIMDE_FLOAT32_C( -230.95), SIMDE_FLOAT32_C( -140.17) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(-590092.937500), SIMDE_FLOAT32_C(590752.500000), SIMDE_FLOAT32_C(-167685.140625), SIMDE_FLOAT32_C(165936.531250) } }, - { { SIMDE_FLOAT32_C( 556.73), SIMDE_FLOAT32_C( -701.90), SIMDE_FLOAT32_C( -356.50), SIMDE_FLOAT32_C( -532.42) }, - { SIMDE_FLOAT32_C( 856.94), SIMDE_FLOAT32_C( -261.67), SIMDE_FLOAT32_C( -208.07), SIMDE_FLOAT32_C( 27.93) }, - { SIMDE_FLOAT32_C( 924.32), SIMDE_FLOAT32_C( -863.60), SIMDE_FLOAT32_C( -687.65), SIMDE_FLOAT32_C( 238.39) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-241310.093750), SIMDE_FLOAT32_C(241164.921875), SIMDE_FLOAT32_C(25459.757812), SIMDE_FLOAT32_C(-26348.677734) } }, - { { SIMDE_FLOAT32_C( -286.79), SIMDE_FLOAT32_C( 630.61), SIMDE_FLOAT32_C( -989.22), SIMDE_FLOAT32_C( 223.21) }, - { SIMDE_FLOAT32_C( 812.31), SIMDE_FLOAT32_C( 667.33), SIMDE_FLOAT32_C( 841.41), SIMDE_FLOAT32_C( 735.52) }, - { SIMDE_FLOAT32_C( 308.52), SIMDE_FLOAT32_C( -189.06), SIMDE_FLOAT32_C( -63.33), SIMDE_FLOAT32_C( 837.76) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(-126452.203125), SIMDE_FLOAT32_C(126796.023438), SIMDE_FLOAT32_C(-140046.640625), SIMDE_FLOAT32_C(139280.625000) } } - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + {{SIMDE_FLOAT32_C(355.18), SIMDE_FLOAT32_C(169.63), + SIMDE_FLOAT32_C(116.87), SIMDE_FLOAT32_C(-467.19)}, + {SIMDE_FLOAT32_C(-513.94), SIMDE_FLOAT32_C(-999.71), + SIMDE_FLOAT32_C(-285.25), SIMDE_FLOAT32_C(-931.88)}, + {SIMDE_FLOAT32_C(-839.57), SIMDE_FLOAT32_C(-681.40), + SIMDE_FLOAT32_C(-117.60), SIMDE_FLOAT32_C(-459.86)}, + INT32_C(0), + {SIMDE_FLOAT32_C(839681.750000), SIMDE_FLOAT32_C(-839156.937500), + SIMDE_FLOAT32_C(782495.375000), SIMDE_FLOAT32_C(-782845.687500)}}, + {{SIMDE_FLOAT32_C(-57.67), SIMDE_FLOAT32_C(-897.07), + SIMDE_FLOAT32_C(118.98), SIMDE_FLOAT32_C(-387.92)}, + {SIMDE_FLOAT32_C(-362.79), SIMDE_FLOAT32_C(160.99), + SIMDE_FLOAT32_C(-2.72), SIMDE_FLOAT32_C(206.65)}, + {SIMDE_FLOAT32_C(49.34), SIMDE_FLOAT32_C(511.85), + SIMDE_FLOAT32_C(547.20), SIMDE_FLOAT32_C(-119.58)}, + INT32_C(1), + {SIMDE_FLOAT32_C(82345.062500), SIMDE_FLOAT32_C(-83299.804688), + SIMDE_FLOAT32_C(105892.781250), SIMDE_FLOAT32_C(-106161.718750)}}, + {{SIMDE_FLOAT32_C(-219.54), SIMDE_FLOAT32_C(-959.14), + SIMDE_FLOAT32_C(943.92), SIMDE_FLOAT32_C(628.48)}, + {SIMDE_FLOAT32_C(446.65), SIMDE_FLOAT32_C(-500.77), + SIMDE_FLOAT32_C(-347.79), SIMDE_FLOAT32_C(813.11)}, + {SIMDE_FLOAT32_C(-542.25), SIMDE_FLOAT32_C(232.48), + SIMDE_FLOAT32_C(684.35), SIMDE_FLOAT32_C(710.26)}, + INT32_C(0), + {SIMDE_FLOAT32_C(271323.000000), SIMDE_FLOAT32_C(-272501.656250), + SIMDE_FLOAT32_C(-439964.968750), SIMDE_FLOAT32_C(441537.375000)}}, + {{SIMDE_FLOAT32_C(783.09), SIMDE_FLOAT32_C(-727.02), + SIMDE_FLOAT32_C(-586.46), SIMDE_FLOAT32_C(64.33)}, + {SIMDE_FLOAT32_C(-490.08), SIMDE_FLOAT32_C(740.49), + SIMDE_FLOAT32_C(-591.56), SIMDE_FLOAT32_C(-759.78)}, + {SIMDE_FLOAT32_C(-380.84), SIMDE_FLOAT32_C(993.01), + SIMDE_FLOAT32_C(-759.56), SIMDE_FLOAT32_C(861.16)}, + INT32_C(1), + {SIMDE_FLOAT32_C(736097.062500), SIMDE_FLOAT32_C(-736041.000000), + SIMDE_FLOAT32_C(-755055.625000), SIMDE_FLOAT32_C(754533.500000)}}, + {{SIMDE_FLOAT32_C(998.31), SIMDE_FLOAT32_C(538.40), + SIMDE_FLOAT32_C(-191.12), SIMDE_FLOAT32_C(-434.48)}, + {SIMDE_FLOAT32_C(592.83), SIMDE_FLOAT32_C(820.32), + SIMDE_FLOAT32_C(-296.84), SIMDE_FLOAT32_C(-612.30)}, + {SIMDE_FLOAT32_C(-552.34), SIMDE_FLOAT32_C(329.08), + SIMDE_FLOAT32_C(765.26), SIMDE_FLOAT32_C(-531.08)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-452097.250000), SIMDE_FLOAT32_C(453633.968750), + SIMDE_FLOAT32_C(338006.656250), SIMDE_FLOAT32_C(-338632.281250)}}, + {{SIMDE_FLOAT32_C(52.61), SIMDE_FLOAT32_C(606.93), + SIMDE_FLOAT32_C(-894.22), SIMDE_FLOAT32_C(-854.38)}, + {SIMDE_FLOAT32_C(972.80), SIMDE_FLOAT32_C(-807.39), + SIMDE_FLOAT32_C(668.59), SIMDE_FLOAT32_C(-228.19)}, + {SIMDE_FLOAT32_C(-528.51), SIMDE_FLOAT32_C(730.93), + SIMDE_FLOAT32_C(-230.95), SIMDE_FLOAT32_C(-140.17)}, + INT32_C(1), + {SIMDE_FLOAT32_C(-590092.937500), SIMDE_FLOAT32_C(590752.500000), + SIMDE_FLOAT32_C(-167685.140625), SIMDE_FLOAT32_C(165936.531250)}}, + {{SIMDE_FLOAT32_C(556.73), SIMDE_FLOAT32_C(-701.90), + SIMDE_FLOAT32_C(-356.50), SIMDE_FLOAT32_C(-532.42)}, + {SIMDE_FLOAT32_C(856.94), SIMDE_FLOAT32_C(-261.67), + SIMDE_FLOAT32_C(-208.07), SIMDE_FLOAT32_C(27.93)}, + {SIMDE_FLOAT32_C(924.32), SIMDE_FLOAT32_C(-863.60), + SIMDE_FLOAT32_C(-687.65), SIMDE_FLOAT32_C(238.39)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-241310.093750), SIMDE_FLOAT32_C(241164.921875), + SIMDE_FLOAT32_C(25459.757812), SIMDE_FLOAT32_C(-26348.677734)}}, + {{SIMDE_FLOAT32_C(-286.79), SIMDE_FLOAT32_C(630.61), + SIMDE_FLOAT32_C(-989.22), SIMDE_FLOAT32_C(223.21)}, + {SIMDE_FLOAT32_C(812.31), SIMDE_FLOAT32_C(667.33), + SIMDE_FLOAT32_C(841.41), SIMDE_FLOAT32_C(735.52)}, + {SIMDE_FLOAT32_C(308.52), SIMDE_FLOAT32_C(-189.06), + SIMDE_FLOAT32_C(-63.33), SIMDE_FLOAT32_C(837.76)}, + INT32_C(1), + {SIMDE_FLOAT32_C(-126452.203125), SIMDE_FLOAT32_C(126796.023438), + SIMDE_FLOAT32_C(-140046.640625), SIMDE_FLOAT32_C(139280.625000)}}}; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r; - SIMDE_CONSTIFY_2_(simde_vcmlaq_rot270_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); - simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + SIMDE_CONSTIFY_2_( + simde_vcmlaq_rot270_laneq_f32, r, + (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), + test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), + 1); // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); - } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; + for (int i = 0; i < 8; i++) { simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcmlaq_rot270_laneq_f32(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); diff --git a/test/arm/neon/cmla_rot90_lane.c b/test/arm/neon/cmla_rot90_lane.c index 522fd12b1..37812cf17 100644 --- a/test/arm/neon/cmla_rot90_lane.c +++ b/test/arm/neon/cmla_rot90_lane.c @@ -1,11 +1,11 @@ #define SIMDE_TEST_ARM_NEON_INSN cmla_rot90_lane -#include "test-neon.h" #include "../../../simde/arm/neon/cmla_rot90_lane.h" + #include "../../../simde/arm/neon/dup_n.h" +#include "test-neon.h" -static int -test_simde_vcmla_rot90_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmla_rot90_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float16_t r_[4]; @@ -14,73 +14,107 @@ test_simde_vcmla_rot90_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[4]; } test_vec[] = { - { - { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) }, - { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -31.19) }, - { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -48.00) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( -487.75), SIMDE_FLOAT16_VALUE( 329.50), SIMDE_FLOAT16_VALUE( 711.50), SIMDE_FLOAT16_VALUE( -1905.00) } }, - { { SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, - { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -14.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, - { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 61.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( 1593.00), SIMDE_FLOAT16_VALUE( -760.00), SIMDE_FLOAT16_VALUE(-44736.00), SIMDE_FLOAT16_VALUE( 45248.00) } }, - { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, - { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, - { SIMDE_FLOAT16_VALUE( -61.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-59136.00), SIMDE_FLOAT16_VALUE( 58496.00), SIMDE_FLOAT16_VALUE( 28944.00), SIMDE_FLOAT16_VALUE(-29152.00) } }, - { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -180.50) }, - { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 129.00), SIMDE_FLOAT16_VALUE( 99.06), SIMDE_FLOAT16_VALUE( -75.25) }, - { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 239.50), SIMDE_FLOAT16_VALUE( -29.96), SIMDE_FLOAT16_VALUE( -177.50) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(-30800.00), SIMDE_FLOAT16_VALUE( 30688.00), SIMDE_FLOAT16_VALUE( 17888.00), SIMDE_FLOAT16_VALUE(-18208.00) } }, - { { SIMDE_FLOAT16_VALUE( 167.25), SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( -63.38), SIMDE_FLOAT16_VALUE( 57.00) }, - { SIMDE_FLOAT16_VALUE( 191.75), SIMDE_FLOAT16_VALUE( -197.00), SIMDE_FLOAT16_VALUE( 285.00), SIMDE_FLOAT16_VALUE( -529.00) }, - { SIMDE_FLOAT16_VALUE( -80.50), SIMDE_FLOAT16_VALUE( 375.50), SIMDE_FLOAT16_VALUE( -206.00), SIMDE_FLOAT16_VALUE( -75.25) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-15688.00), SIMDE_FLOAT16_VALUE( 15856.00), SIMDE_FLOAT16_VALUE(-42656.00), SIMDE_FLOAT16_VALUE( 42656.00) } }, - { { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( -41.00), SIMDE_FLOAT16_VALUE( 285.00) }, - { SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -271.25), SIMDE_FLOAT16_VALUE( -257.50) }, - { SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( -180.50), SIMDE_FLOAT16_VALUE( 69.62), SIMDE_FLOAT16_VALUE( 131.38) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( 1813.00), SIMDE_FLOAT16_VALUE( -1920.00), SIMDE_FLOAT16_VALUE(-46528.00), SIMDE_FLOAT16_VALUE( 46752.00) } }, - { { SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -247.00), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 17.94) }, - { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( -110.75), SIMDE_FLOAT16_VALUE( 18.20) }, - { SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 97.31) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-14712.00), SIMDE_FLOAT16_VALUE( 14672.00), SIMDE_FLOAT16_VALUE( -1574.00), SIMDE_FLOAT16_VALUE( 1653.00) } }, - { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00) }, - { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( -151.12) }, - { SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44), SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( 75.88) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( 13160.00), SIMDE_FLOAT16_VALUE(-14744.00), SIMDE_FLOAT16_VALUE(-11400.00), SIMDE_FLOAT16_VALUE( 12432.00) } } - + {{SIMDE_FLOAT16_VALUE(-49.28), SIMDE_FLOAT16_VALUE(-109.00), + SIMDE_FLOAT16_VALUE(-626.50), SIMDE_FLOAT16_VALUE(-567.00)}, + {SIMDE_FLOAT16_VALUE(-178.88), SIMDE_FLOAT16_VALUE(10.22), + SIMDE_FLOAT16_VALUE(-228.12), SIMDE_FLOAT16_VALUE(-31.19)}, + {SIMDE_FLOAT16_VALUE(42.90), SIMDE_FLOAT16_VALUE(-98.75), + SIMDE_FLOAT16_VALUE(350.00), SIMDE_FLOAT16_VALUE(-48.00)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-487.75), SIMDE_FLOAT16_VALUE(329.50), + SIMDE_FLOAT16_VALUE(711.50), SIMDE_FLOAT16_VALUE(-1905.00)}}, + {{SIMDE_FLOAT16_VALUE(730.00), SIMDE_FLOAT16_VALUE(103.12), + SIMDE_FLOAT16_VALUE(-381.25), SIMDE_FLOAT16_VALUE(898.50)}, + {SIMDE_FLOAT16_VALUE(293.00), SIMDE_FLOAT16_VALUE(-14.06), + SIMDE_FLOAT16_VALUE(-137.62), SIMDE_FLOAT16_VALUE(722.50)}, + {SIMDE_FLOAT16_VALUE(-6.33), SIMDE_FLOAT16_VALUE(61.38), + SIMDE_FLOAT16_VALUE(115.50), SIMDE_FLOAT16_VALUE(70.38)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(1593.00), SIMDE_FLOAT16_VALUE(-760.00), + SIMDE_FLOAT16_VALUE(-44736.00), SIMDE_FLOAT16_VALUE(45248.00)}}, + {{SIMDE_FLOAT16_VALUE(-210.88), SIMDE_FLOAT16_VALUE(-430.75), + SIMDE_FLOAT16_VALUE(725.50), SIMDE_FLOAT16_VALUE(-944.50)}, + {SIMDE_FLOAT16_VALUE(142.00), SIMDE_FLOAT16_VALUE(-966.00), + SIMDE_FLOAT16_VALUE(-131.50), SIMDE_FLOAT16_VALUE(462.50)}, + {SIMDE_FLOAT16_VALUE(-61.00), SIMDE_FLOAT16_VALUE(185.62), + SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(749.50)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-59136.00), SIMDE_FLOAT16_VALUE(58496.00), + SIMDE_FLOAT16_VALUE(28944.00), SIMDE_FLOAT16_VALUE(-29152.00)}}, + {{SIMDE_FLOAT16_VALUE(89.44), SIMDE_FLOAT16_VALUE(-200.50), + SIMDE_FLOAT16_VALUE(-136.50), SIMDE_FLOAT16_VALUE(-180.50)}, + {SIMDE_FLOAT16_VALUE(-157.12), SIMDE_FLOAT16_VALUE(129.00), + SIMDE_FLOAT16_VALUE(99.06), SIMDE_FLOAT16_VALUE(-75.25)}, + {SIMDE_FLOAT16_VALUE(32.59), SIMDE_FLOAT16_VALUE(239.50), + SIMDE_FLOAT16_VALUE(-29.96), SIMDE_FLOAT16_VALUE(-177.50)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-30800.00), SIMDE_FLOAT16_VALUE(30688.00), + SIMDE_FLOAT16_VALUE(17888.00), SIMDE_FLOAT16_VALUE(-18208.00)}}, + {{SIMDE_FLOAT16_VALUE(167.25), SIMDE_FLOAT16_VALUE(-1.52), + SIMDE_FLOAT16_VALUE(-63.38), SIMDE_FLOAT16_VALUE(57.00)}, + {SIMDE_FLOAT16_VALUE(191.75), SIMDE_FLOAT16_VALUE(-197.00), + SIMDE_FLOAT16_VALUE(285.00), SIMDE_FLOAT16_VALUE(-529.00)}, + {SIMDE_FLOAT16_VALUE(-80.50), SIMDE_FLOAT16_VALUE(375.50), + SIMDE_FLOAT16_VALUE(-206.00), SIMDE_FLOAT16_VALUE(-75.25)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-15688.00), SIMDE_FLOAT16_VALUE(15856.00), + SIMDE_FLOAT16_VALUE(-42656.00), SIMDE_FLOAT16_VALUE(42656.00)}}, + {{SIMDE_FLOAT16_VALUE(-31.19), SIMDE_FLOAT16_VALUE(-75.25), + SIMDE_FLOAT16_VALUE(-41.00), SIMDE_FLOAT16_VALUE(285.00)}, + {SIMDE_FLOAT16_VALUE(-1.52), SIMDE_FLOAT16_VALUE(10.22), + SIMDE_FLOAT16_VALUE(-271.25), SIMDE_FLOAT16_VALUE(-257.50)}, + {SIMDE_FLOAT16_VALUE(-31.45), SIMDE_FLOAT16_VALUE(-180.50), + SIMDE_FLOAT16_VALUE(69.62), SIMDE_FLOAT16_VALUE(131.38)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(1813.00), SIMDE_FLOAT16_VALUE(-1920.00), + SIMDE_FLOAT16_VALUE(-46528.00), SIMDE_FLOAT16_VALUE(46752.00)}}, + {{SIMDE_FLOAT16_VALUE(205.75), SIMDE_FLOAT16_VALUE(-247.00), + SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(17.94)}, + {SIMDE_FLOAT16_VALUE(-397.75), SIMDE_FLOAT16_VALUE(166.12), + SIMDE_FLOAT16_VALUE(-110.75), SIMDE_FLOAT16_VALUE(18.20)}, + {SIMDE_FLOAT16_VALUE(89.81), SIMDE_FLOAT16_VALUE(59.75), + SIMDE_FLOAT16_VALUE(-113.25), SIMDE_FLOAT16_VALUE(97.31)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-14712.00), SIMDE_FLOAT16_VALUE(14672.00), + SIMDE_FLOAT16_VALUE(-1574.00), SIMDE_FLOAT16_VALUE(1653.00)}}, + {{SIMDE_FLOAT16_VALUE(-869.50), SIMDE_FLOAT16_VALUE(-710.00), + SIMDE_FLOAT16_VALUE(457.25), SIMDE_FLOAT16_VALUE(579.00)}, + {SIMDE_FLOAT16_VALUE(226.38), SIMDE_FLOAT16_VALUE(178.88), + SIMDE_FLOAT16_VALUE(181.75), SIMDE_FLOAT16_VALUE(-151.12)}, + {SIMDE_FLOAT16_VALUE(121.50), SIMDE_FLOAT16_VALUE(-78.44), + SIMDE_FLOAT16_VALUE(48.06), SIMDE_FLOAT16_VALUE(75.88)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(13160.00), SIMDE_FLOAT16_VALUE(-14744.00), + SIMDE_FLOAT16_VALUE(-11400.00), SIMDE_FLOAT16_VALUE(12432.00)}} + }; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x4_t r; - SIMDE_CONSTIFY_2_(simde_vcmla_rot90_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); - // simde_float16x4_t r = simde_vcmla_rot90_lane_f16(r_, a, b, test_vec[i].lane); + SIMDE_CONSTIFY_2_( + simde_vcmla_rot90_lane_f16, r, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + test_vec[i].lane, r_, a, b); + // simde_float16x4_t r = simde_vcmla_rot90_lane_f16(r_, a, b, + // test_vec[i].lane); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); // simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; + for (int i = 0; i < 8; i++) { simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t r = simde_vcmla_rot90_lane_f16(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -91,8 +125,7 @@ test_simde_vcmla_rot90_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmla_rot90_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmla_rot90_lane_f32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[2]; @@ -101,69 +134,68 @@ test_simde_vcmla_rot90_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[2]; } test_vec[] = { - { { SIMDE_FLOAT32_C( 83.21), SIMDE_FLOAT32_C( 417.90) }, - { SIMDE_FLOAT32_C( -875.72), SIMDE_FLOAT32_C( 830.54) }, - { SIMDE_FLOAT32_C( -633.53), SIMDE_FLOAT32_C( 832.17) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(526255.250000), SIMDE_FLOAT32_C(-525754.125000) } }, - { { SIMDE_FLOAT32_C( -890.17), SIMDE_FLOAT32_C( 649.92) }, - { SIMDE_FLOAT32_C( -111.22), SIMDE_FLOAT32_C( -830.36) }, - { SIMDE_FLOAT32_C( 59.76), SIMDE_FLOAT32_C( 970.61) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(48732.140625), SIMDE_FLOAT32_C(-48972.390625) } }, - { { SIMDE_FLOAT32_C( 522.31), SIMDE_FLOAT32_C( -822.40) }, - { SIMDE_FLOAT32_C( 411.34), SIMDE_FLOAT32_C( -692.35) }, - { SIMDE_FLOAT32_C( 648.71), SIMDE_FLOAT32_C( 385.20) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(449656.687500), SIMDE_FLOAT32_C(-449956.781250) } }, - { { SIMDE_FLOAT32_C( 479.18), SIMDE_FLOAT32_C( -793.73) }, - { SIMDE_FLOAT32_C( -740.26), SIMDE_FLOAT32_C( 245.04) }, - { SIMDE_FLOAT32_C( 229.26), SIMDE_FLOAT32_C( -113.23) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-55698.687500), SIMDE_FLOAT32_C(55384.136719) } }, - { { SIMDE_FLOAT32_C( 331.48), SIMDE_FLOAT32_C( -677.34) }, - { SIMDE_FLOAT32_C( 97.30), SIMDE_FLOAT32_C( -52.10) }, - { SIMDE_FLOAT32_C( 239.17), SIMDE_FLOAT32_C( 469.68) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(12792.236328), SIMDE_FLOAT32_C(-13138.096680) } }, - { { SIMDE_FLOAT32_C( -543.40), SIMDE_FLOAT32_C( 826.85) }, - { SIMDE_FLOAT32_C( 226.38), SIMDE_FLOAT32_C( 178.84) }, - { SIMDE_FLOAT32_C( 181.71), SIMDE_FLOAT32_C( 420.52) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-33040.417969), SIMDE_FLOAT32_C(33323.867188) } }, - { { SIMDE_FLOAT32_C( -698.84), SIMDE_FLOAT32_C( -151.15) }, - { SIMDE_FLOAT32_C( -388.27), SIMDE_FLOAT32_C( 350.81) }, - { SIMDE_FLOAT32_C( 890.40), SIMDE_FLOAT32_C( -664.75) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-313060.062500), SIMDE_FLOAT32_C(312210.093750) } }, - { { SIMDE_FLOAT32_C( -617.94), SIMDE_FLOAT32_C( 190.84) }, - { SIMDE_FLOAT32_C( 218.13), SIMDE_FLOAT32_C( -328.97) }, - { SIMDE_FLOAT32_C( -549.59), SIMDE_FLOAT32_C( -459.89) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-181416.578125), SIMDE_FLOAT32_C(180989.468750) } } - }; + {{SIMDE_FLOAT32_C(83.21), SIMDE_FLOAT32_C(417.90)}, + {SIMDE_FLOAT32_C(-875.72), SIMDE_FLOAT32_C(830.54)}, + {SIMDE_FLOAT32_C(-633.53), SIMDE_FLOAT32_C(832.17)}, + INT32_C(0), + {SIMDE_FLOAT32_C(526255.250000), SIMDE_FLOAT32_C(-525754.125000)}}, + {{SIMDE_FLOAT32_C(-890.17), SIMDE_FLOAT32_C(649.92)}, + {SIMDE_FLOAT32_C(-111.22), SIMDE_FLOAT32_C(-830.36)}, + {SIMDE_FLOAT32_C(59.76), SIMDE_FLOAT32_C(970.61)}, + INT32_C(0), + {SIMDE_FLOAT32_C(48732.140625), SIMDE_FLOAT32_C(-48972.390625)}}, + {{SIMDE_FLOAT32_C(522.31), SIMDE_FLOAT32_C(-822.40)}, + {SIMDE_FLOAT32_C(411.34), SIMDE_FLOAT32_C(-692.35)}, + {SIMDE_FLOAT32_C(648.71), SIMDE_FLOAT32_C(385.20)}, + INT32_C(0), + {SIMDE_FLOAT32_C(449656.687500), SIMDE_FLOAT32_C(-449956.781250)}}, + {{SIMDE_FLOAT32_C(479.18), SIMDE_FLOAT32_C(-793.73)}, + {SIMDE_FLOAT32_C(-740.26), SIMDE_FLOAT32_C(245.04)}, + {SIMDE_FLOAT32_C(229.26), SIMDE_FLOAT32_C(-113.23)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-55698.687500), SIMDE_FLOAT32_C(55384.136719)}}, + {{SIMDE_FLOAT32_C(331.48), SIMDE_FLOAT32_C(-677.34)}, + {SIMDE_FLOAT32_C(97.30), SIMDE_FLOAT32_C(-52.10)}, + {SIMDE_FLOAT32_C(239.17), SIMDE_FLOAT32_C(469.68)}, + INT32_C(0), + {SIMDE_FLOAT32_C(12792.236328), SIMDE_FLOAT32_C(-13138.096680)}}, + {{SIMDE_FLOAT32_C(-543.40), SIMDE_FLOAT32_C(826.85)}, + {SIMDE_FLOAT32_C(226.38), SIMDE_FLOAT32_C(178.84)}, + {SIMDE_FLOAT32_C(181.71), SIMDE_FLOAT32_C(420.52)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-33040.417969), SIMDE_FLOAT32_C(33323.867188)}}, + {{SIMDE_FLOAT32_C(-698.84), SIMDE_FLOAT32_C(-151.15)}, + {SIMDE_FLOAT32_C(-388.27), SIMDE_FLOAT32_C(350.81)}, + {SIMDE_FLOAT32_C(890.40), SIMDE_FLOAT32_C(-664.75)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-313060.062500), SIMDE_FLOAT32_C(312210.093750)}}, + {{SIMDE_FLOAT32_C(-617.94), SIMDE_FLOAT32_C(190.84)}, + {SIMDE_FLOAT32_C(218.13), SIMDE_FLOAT32_C(-328.97)}, + {SIMDE_FLOAT32_C(-549.59), SIMDE_FLOAT32_C(-459.89)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-181416.578125), SIMDE_FLOAT32_C(180989.468750)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vcmla_rot90_lane_f32(r_, a, b, 0); - // SIMDE_CONSTIFY_2_(simde_vcmla_rot90_lane_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + // SIMDE_CONSTIFY_2_(simde_vcmla_rot90_lane_f32, r, (HEDLEY_UNREACHABLE(), + // simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 0, 0, 0, 0, 0, 0, 0}; + for (int i = 0; i < 8; i++) { simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vcmla_rot90_lane_f32(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -174,9 +206,7 @@ test_simde_vcmla_rot90_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } - -static int -test_simde_vcmla_rot90_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmla_rot90_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float16_t r_[4]; @@ -185,79 +215,121 @@ test_simde_vcmla_rot90_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[4]; } test_vec[] = { - { { SIMDE_FLOAT16_VALUE( 816.50), SIMDE_FLOAT16_VALUE( 773.50), SIMDE_FLOAT16_VALUE( -377.75), SIMDE_FLOAT16_VALUE( 544.50) }, - { SIMDE_FLOAT16_VALUE( -188.38), SIMDE_FLOAT16_VALUE( 269.75), SIMDE_FLOAT16_VALUE( 115.62), SIMDE_FLOAT16_VALUE( -544.00) }, - { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( 484.00), SIMDE_FLOAT16_VALUE( -757.00), - SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-10760.00), SIMDE_FLOAT16_VALUE( 12344.00), SIMDE_FLOAT16_VALUE( 22960.00), SIMDE_FLOAT16_VALUE(-22800.00) } }, - { { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, - { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, - { SIMDE_FLOAT16_VALUE( -899.00), SIMDE_FLOAT16_VALUE( 32.51), SIMDE_FLOAT16_VALUE( 359.00), SIMDE_FLOAT16_VALUE( 342.25), - SIMDE_FLOAT16_VALUE( 913.00), SIMDE_FLOAT16_VALUE( 740.50), SIMDE_FLOAT16_VALUE( -436.25), SIMDE_FLOAT16_VALUE( -904.00) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( -4952.00), SIMDE_FLOAT16_VALUE( 5132.00), SIMDE_FLOAT16_VALUE( -2424.00), SIMDE_FLOAT16_VALUE( 3010.00) } }, - { { SIMDE_FLOAT16_VALUE( -335.25), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( 227.25), SIMDE_FLOAT16_VALUE( -8.67) }, - { SIMDE_FLOAT16_VALUE( 490.50), SIMDE_FLOAT16_VALUE( 507.50), SIMDE_FLOAT16_VALUE( -979.50), SIMDE_FLOAT16_VALUE( -718.00) }, - { SIMDE_FLOAT16_VALUE( -10.20), SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -797.50), SIMDE_FLOAT16_VALUE( 741.50), - SIMDE_FLOAT16_VALUE( -835.00), SIMDE_FLOAT16_VALUE( -308.00), SIMDE_FLOAT16_VALUE( -674.50), SIMDE_FLOAT16_VALUE( -247.00) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 4844.00), SIMDE_FLOAT16_VALUE( -5432.00), SIMDE_FLOAT16_VALUE( -7100.00), SIMDE_FLOAT16_VALUE( 7316.00) } }, - { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 25.50), SIMDE_FLOAT16_VALUE( -44.50) }, - { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -66.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, - { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 85.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50), - SIMDE_FLOAT16_VALUE( -975.50), SIMDE_FLOAT16_VALUE( 383.50), SIMDE_FLOAT16_VALUE( 500.25), SIMDE_FLOAT16_VALUE( 517.00) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( 5440.00), SIMDE_FLOAT16_VALUE( -6080.00), SIMDE_FLOAT16_VALUE(-39584.00), SIMDE_FLOAT16_VALUE( 39552.00) } }, - { { SIMDE_FLOAT16_VALUE( 540.00), SIMDE_FLOAT16_VALUE( -927.50), SIMDE_FLOAT16_VALUE( 17.94), SIMDE_FLOAT16_VALUE( 340.75) }, - { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( 854.00), SIMDE_FLOAT16_VALUE( -110.75) }, - { SIMDE_FLOAT16_VALUE( -53.36), SIMDE_FLOAT16_VALUE( -465.00), SIMDE_FLOAT16_VALUE( -104.00), SIMDE_FLOAT16_VALUE( -348.00), - SIMDE_FLOAT16_VALUE( -370.75), SIMDE_FLOAT16_VALUE( 18.20), SIMDE_FLOAT16_VALUE( -209.25), SIMDE_FLOAT16_VALUE( -856.00) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 9408.00), SIMDE_FLOAT16_VALUE( -9792.00), SIMDE_FLOAT16_VALUE( -5892.00), SIMDE_FLOAT16_VALUE( 6252.00) } }, - { { SIMDE_FLOAT16_VALUE( 251.25), SIMDE_FLOAT16_VALUE( 523.50), SIMDE_FLOAT16_VALUE( -62.34), SIMDE_FLOAT16_VALUE( -412.00) }, - { SIMDE_FLOAT16_VALUE( 818.50), SIMDE_FLOAT16_VALUE( -266.25), SIMDE_FLOAT16_VALUE( 558.50), SIMDE_FLOAT16_VALUE( -474.75) }, - { SIMDE_FLOAT16_VALUE( 583.50), SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 875.50), SIMDE_FLOAT16_VALUE( 382.50), - SIMDE_FLOAT16_VALUE( -535.00), SIMDE_FLOAT16_VALUE( 374.50), SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 432.50) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( 15872.00), SIMDE_FLOAT16_VALUE(-15096.00), SIMDE_FLOAT16_VALUE( 27792.00), SIMDE_FLOAT16_VALUE(-28256.00) } }, - { { SIMDE_FLOAT16_VALUE( 83.19), SIMDE_FLOAT16_VALUE( 418.00), SIMDE_FLOAT16_VALUE( -875.50), SIMDE_FLOAT16_VALUE( 830.50) }, - { SIMDE_FLOAT16_VALUE( -633.50), SIMDE_FLOAT16_VALUE( 832.00), SIMDE_FLOAT16_VALUE( -890.00), SIMDE_FLOAT16_VALUE( 650.00) }, - { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -830.50), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( 970.50), - SIMDE_FLOAT16_VALUE( 522.50), SIMDE_FLOAT16_VALUE( -822.50), SIMDE_FLOAT16_VALUE( 411.25), SIMDE_FLOAT16_VALUE( -692.50) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 26032.00), SIMDE_FLOAT16_VALUE(-25536.00), SIMDE_FLOAT16_VALUE( 19392.00), SIMDE_FLOAT16_VALUE(-19440.00) } }, - { { SIMDE_FLOAT16_VALUE( 648.50), SIMDE_FLOAT16_VALUE( 385.25), SIMDE_FLOAT16_VALUE( 479.25), SIMDE_FLOAT16_VALUE( -793.50) }, - { SIMDE_FLOAT16_VALUE( -740.50), SIMDE_FLOAT16_VALUE( 245.00), SIMDE_FLOAT16_VALUE( 229.25), SIMDE_FLOAT16_VALUE( -113.25) }, - { SIMDE_FLOAT16_VALUE( 331.50), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( 97.31), SIMDE_FLOAT16_VALUE( -52.09), - SIMDE_FLOAT16_VALUE( 239.12), SIMDE_FLOAT16_VALUE( 469.75), SIMDE_FLOAT16_VALUE( -543.50), SIMDE_FLOAT16_VALUE( 827.00) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( 19088.00), SIMDE_FLOAT16_VALUE(-18048.00), SIMDE_FLOAT16_VALUE( -8044.00), SIMDE_FLOAT16_VALUE( 7728.00) } } - }; + {{SIMDE_FLOAT16_VALUE(816.50), SIMDE_FLOAT16_VALUE(773.50), + SIMDE_FLOAT16_VALUE(-377.75), SIMDE_FLOAT16_VALUE(544.50)}, + {SIMDE_FLOAT16_VALUE(-188.38), SIMDE_FLOAT16_VALUE(269.75), + SIMDE_FLOAT16_VALUE(115.62), SIMDE_FLOAT16_VALUE(-544.00)}, + {SIMDE_FLOAT16_VALUE(42.90), SIMDE_FLOAT16_VALUE(924.50), + SIMDE_FLOAT16_VALUE(484.00), SIMDE_FLOAT16_VALUE(-757.00), + SIMDE_FLOAT16_VALUE(730.00), SIMDE_FLOAT16_VALUE(103.12), + SIMDE_FLOAT16_VALUE(-381.25), SIMDE_FLOAT16_VALUE(898.50)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-10760.00), SIMDE_FLOAT16_VALUE(12344.00), + SIMDE_FLOAT16_VALUE(22960.00), SIMDE_FLOAT16_VALUE(-22800.00)}}, + {{SIMDE_FLOAT16_VALUE(293.00), SIMDE_FLOAT16_VALUE(-114.06), + SIMDE_FLOAT16_VALUE(-137.62), SIMDE_FLOAT16_VALUE(722.50)}, + {SIMDE_FLOAT16_VALUE(-6.33), SIMDE_FLOAT16_VALUE(161.38), + SIMDE_FLOAT16_VALUE(115.50), SIMDE_FLOAT16_VALUE(70.38)}, + {SIMDE_FLOAT16_VALUE(-899.00), SIMDE_FLOAT16_VALUE(32.51), + SIMDE_FLOAT16_VALUE(359.00), SIMDE_FLOAT16_VALUE(342.25), + SIMDE_FLOAT16_VALUE(913.00), SIMDE_FLOAT16_VALUE(740.50), + SIMDE_FLOAT16_VALUE(-436.25), SIMDE_FLOAT16_VALUE(-904.00)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-4952.00), SIMDE_FLOAT16_VALUE(5132.00), + SIMDE_FLOAT16_VALUE(-2424.00), SIMDE_FLOAT16_VALUE(3010.00)}}, + {{SIMDE_FLOAT16_VALUE(-335.25), SIMDE_FLOAT16_VALUE(-252.38), + SIMDE_FLOAT16_VALUE(227.25), SIMDE_FLOAT16_VALUE(-8.67)}, + {SIMDE_FLOAT16_VALUE(490.50), SIMDE_FLOAT16_VALUE(507.50), + SIMDE_FLOAT16_VALUE(-979.50), SIMDE_FLOAT16_VALUE(-718.00)}, + {SIMDE_FLOAT16_VALUE(-10.20), SIMDE_FLOAT16_VALUE(205.75), + SIMDE_FLOAT16_VALUE(-797.50), SIMDE_FLOAT16_VALUE(741.50), + SIMDE_FLOAT16_VALUE(-835.00), SIMDE_FLOAT16_VALUE(-308.00), + SIMDE_FLOAT16_VALUE(-674.50), SIMDE_FLOAT16_VALUE(-247.00)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(4844.00), SIMDE_FLOAT16_VALUE(-5432.00), + SIMDE_FLOAT16_VALUE(-7100.00), SIMDE_FLOAT16_VALUE(7316.00)}}, + {{SIMDE_FLOAT16_VALUE(-210.88), SIMDE_FLOAT16_VALUE(-430.75), + SIMDE_FLOAT16_VALUE(25.50), SIMDE_FLOAT16_VALUE(-44.50)}, + {SIMDE_FLOAT16_VALUE(142.00), SIMDE_FLOAT16_VALUE(-66.00), + SIMDE_FLOAT16_VALUE(-131.50), SIMDE_FLOAT16_VALUE(462.50)}, + {SIMDE_FLOAT16_VALUE(-361.00), SIMDE_FLOAT16_VALUE(85.62), + SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(749.50), + SIMDE_FLOAT16_VALUE(-975.50), SIMDE_FLOAT16_VALUE(383.50), + SIMDE_FLOAT16_VALUE(500.25), SIMDE_FLOAT16_VALUE(517.00)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(5440.00), SIMDE_FLOAT16_VALUE(-6080.00), + SIMDE_FLOAT16_VALUE(-39584.00), SIMDE_FLOAT16_VALUE(39552.00)}}, + {{SIMDE_FLOAT16_VALUE(540.00), SIMDE_FLOAT16_VALUE(-927.50), + SIMDE_FLOAT16_VALUE(17.94), SIMDE_FLOAT16_VALUE(340.75)}, + {SIMDE_FLOAT16_VALUE(-397.75), SIMDE_FLOAT16_VALUE(166.12), + SIMDE_FLOAT16_VALUE(854.00), SIMDE_FLOAT16_VALUE(-110.75)}, + {SIMDE_FLOAT16_VALUE(-53.36), SIMDE_FLOAT16_VALUE(-465.00), + SIMDE_FLOAT16_VALUE(-104.00), SIMDE_FLOAT16_VALUE(-348.00), + SIMDE_FLOAT16_VALUE(-370.75), SIMDE_FLOAT16_VALUE(18.20), + SIMDE_FLOAT16_VALUE(-209.25), SIMDE_FLOAT16_VALUE(-856.00)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(9408.00), SIMDE_FLOAT16_VALUE(-9792.00), + SIMDE_FLOAT16_VALUE(-5892.00), SIMDE_FLOAT16_VALUE(6252.00)}}, + {{SIMDE_FLOAT16_VALUE(251.25), SIMDE_FLOAT16_VALUE(523.50), + SIMDE_FLOAT16_VALUE(-62.34), SIMDE_FLOAT16_VALUE(-412.00)}, + {SIMDE_FLOAT16_VALUE(818.50), SIMDE_FLOAT16_VALUE(-266.25), + SIMDE_FLOAT16_VALUE(558.50), SIMDE_FLOAT16_VALUE(-474.75)}, + {SIMDE_FLOAT16_VALUE(583.50), SIMDE_FLOAT16_VALUE(58.66), + SIMDE_FLOAT16_VALUE(875.50), SIMDE_FLOAT16_VALUE(382.50), + SIMDE_FLOAT16_VALUE(-535.00), SIMDE_FLOAT16_VALUE(374.50), + SIMDE_FLOAT16_VALUE(89.81), SIMDE_FLOAT16_VALUE(432.50)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(15872.00), SIMDE_FLOAT16_VALUE(-15096.00), + SIMDE_FLOAT16_VALUE(27792.00), SIMDE_FLOAT16_VALUE(-28256.00)}}, + {{SIMDE_FLOAT16_VALUE(83.19), SIMDE_FLOAT16_VALUE(418.00), + SIMDE_FLOAT16_VALUE(-875.50), SIMDE_FLOAT16_VALUE(830.50)}, + {SIMDE_FLOAT16_VALUE(-633.50), SIMDE_FLOAT16_VALUE(832.00), + SIMDE_FLOAT16_VALUE(-890.00), SIMDE_FLOAT16_VALUE(650.00)}, + {SIMDE_FLOAT16_VALUE(-31.19), SIMDE_FLOAT16_VALUE(-830.50), + SIMDE_FLOAT16_VALUE(59.75), SIMDE_FLOAT16_VALUE(970.50), + SIMDE_FLOAT16_VALUE(522.50), SIMDE_FLOAT16_VALUE(-822.50), + SIMDE_FLOAT16_VALUE(411.25), SIMDE_FLOAT16_VALUE(-692.50)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(26032.00), SIMDE_FLOAT16_VALUE(-25536.00), + SIMDE_FLOAT16_VALUE(19392.00), SIMDE_FLOAT16_VALUE(-19440.00)}}, + {{SIMDE_FLOAT16_VALUE(648.50), SIMDE_FLOAT16_VALUE(385.25), + SIMDE_FLOAT16_VALUE(479.25), SIMDE_FLOAT16_VALUE(-793.50)}, + {SIMDE_FLOAT16_VALUE(-740.50), SIMDE_FLOAT16_VALUE(245.00), + SIMDE_FLOAT16_VALUE(229.25), SIMDE_FLOAT16_VALUE(-113.25)}, + {SIMDE_FLOAT16_VALUE(331.50), SIMDE_FLOAT16_VALUE(-75.25), + SIMDE_FLOAT16_VALUE(97.31), SIMDE_FLOAT16_VALUE(-52.09), + SIMDE_FLOAT16_VALUE(239.12), SIMDE_FLOAT16_VALUE(469.75), + SIMDE_FLOAT16_VALUE(-543.50), SIMDE_FLOAT16_VALUE(827.00)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(19088.00), SIMDE_FLOAT16_VALUE(-18048.00), + SIMDE_FLOAT16_VALUE(-8044.00), SIMDE_FLOAT16_VALUE(7728.00)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); simde_float16x4_t r; - // simde_float16x4_t r = simde_vcmla_rot90_laneq_f16(r_, a, b, test_vec[i].lane); - // simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); - SIMDE_CONSTIFY_2_(simde_vcmla_rot90_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + // simde_float16x4_t r = simde_vcmla_rot90_laneq_f16(r_, a, b, + // test_vec[i].lane); simde_test_arm_neon_write_f16x4(2, r, + // SIMDE_TEST_VEC_POS_LAST); + SIMDE_CONSTIFY_2_( + simde_vcmla_rot90_laneq_f16, r, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; + for (int i = 0; i < 8; i++) { simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x4_t r = simde_vcmla_rot90_laneq_f16(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -268,8 +340,7 @@ test_simde_vcmla_rot90_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmla_rot90_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmla_rot90_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[2]; @@ -278,71 +349,81 @@ test_simde_vcmla_rot90_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[2]; } test_vec[] = { - { { SIMDE_FLOAT32_C( -470.26), SIMDE_FLOAT32_C( 81.87) }, - { SIMDE_FLOAT32_C( 72.27), SIMDE_FLOAT32_C( -992.59) }, - { SIMDE_FLOAT32_C( -615.67), SIMDE_FLOAT32_C( 620.28), SIMDE_FLOAT32_C( -606.51), SIMDE_FLOAT32_C( -327.77) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-611578.125000), SIMDE_FLOAT32_C(611189.750000) } }, - { { SIMDE_FLOAT32_C( -331.80), SIMDE_FLOAT32_C( -605.94) }, - { SIMDE_FLOAT32_C( -295.86), SIMDE_FLOAT32_C( -275.42) }, - { SIMDE_FLOAT32_C( -752.64), SIMDE_FLOAT32_C( 21.91), SIMDE_FLOAT32_C( 826.83), SIMDE_FLOAT32_C( 599.82) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(5702.652344), SIMDE_FLOAT32_C(-6640.392578) } }, - { { SIMDE_FLOAT32_C( -30.36), SIMDE_FLOAT32_C( 631.53) }, - { SIMDE_FLOAT32_C( 850.75), SIMDE_FLOAT32_C( -263.55) }, - { SIMDE_FLOAT32_C( 139.96), SIMDE_FLOAT32_C( 859.14), SIMDE_FLOAT32_C( -834.47), SIMDE_FLOAT32_C( 216.10) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(36856.097656), SIMDE_FLOAT32_C(-36254.929688) } }, - { { SIMDE_FLOAT32_C( 995.86), SIMDE_FLOAT32_C( 529.74) }, - { SIMDE_FLOAT32_C( 79.08), SIMDE_FLOAT32_C( 947.13) }, - { SIMDE_FLOAT32_C( 122.02), SIMDE_FLOAT32_C( -250.00), SIMDE_FLOAT32_C( -361.82), SIMDE_FLOAT32_C( 265.24) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(237778.359375), SIMDE_FLOAT32_C(-236252.765625) } }, - { { SIMDE_FLOAT32_C( 275.71), SIMDE_FLOAT32_C( 2.71) }, - { SIMDE_FLOAT32_C( 99.79), SIMDE_FLOAT32_C( -137.67) }, - { SIMDE_FLOAT32_C( -761.19), SIMDE_FLOAT32_C( 813.19), SIMDE_FLOAT32_C( -897.68), SIMDE_FLOAT32_C( 653.58) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-104517.312500), SIMDE_FLOAT32_C(104795.734375) } }, - { { SIMDE_FLOAT32_C( 396.02), SIMDE_FLOAT32_C( 413.06) }, - { SIMDE_FLOAT32_C( 514.09), SIMDE_FLOAT32_C( -977.67) }, - { SIMDE_FLOAT32_C( -671.79), SIMDE_FLOAT32_C( -92.13), SIMDE_FLOAT32_C( -441.32), SIMDE_FLOAT32_C( -374.27) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(-89676.710938), SIMDE_FLOAT32_C(90485.789062) } }, - { { SIMDE_FLOAT32_C( -151.97), SIMDE_FLOAT32_C( -79.55) }, - { SIMDE_FLOAT32_C( -214.62), SIMDE_FLOAT32_C( -614.75) }, - { SIMDE_FLOAT32_C( 678.79), SIMDE_FLOAT32_C( 783.83), SIMDE_FLOAT32_C( 493.05), SIMDE_FLOAT32_C( -896.00) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(417134.156250), SIMDE_FLOAT32_C(-417365.687500) } }, - { { SIMDE_FLOAT32_C( 104.13), SIMDE_FLOAT32_C( 278.54) }, - { SIMDE_FLOAT32_C( 171.54), SIMDE_FLOAT32_C( -682.63) }, - { SIMDE_FLOAT32_C( 217.09), SIMDE_FLOAT32_C( 49.35), SIMDE_FLOAT32_C( 256.50), SIMDE_FLOAT32_C( -92.04) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(33791.917969), SIMDE_FLOAT32_C(-33409.250000) } } - }; + {{SIMDE_FLOAT32_C(-470.26), SIMDE_FLOAT32_C(81.87)}, + {SIMDE_FLOAT32_C(72.27), SIMDE_FLOAT32_C(-992.59)}, + {SIMDE_FLOAT32_C(-615.67), SIMDE_FLOAT32_C(620.28), + SIMDE_FLOAT32_C(-606.51), SIMDE_FLOAT32_C(-327.77)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-611578.125000), SIMDE_FLOAT32_C(611189.750000)}}, + {{SIMDE_FLOAT32_C(-331.80), SIMDE_FLOAT32_C(-605.94)}, + {SIMDE_FLOAT32_C(-295.86), SIMDE_FLOAT32_C(-275.42)}, + {SIMDE_FLOAT32_C(-752.64), SIMDE_FLOAT32_C(21.91), + SIMDE_FLOAT32_C(826.83), SIMDE_FLOAT32_C(599.82)}, + INT32_C(1), + {SIMDE_FLOAT32_C(5702.652344), SIMDE_FLOAT32_C(-6640.392578)}}, + {{SIMDE_FLOAT32_C(-30.36), SIMDE_FLOAT32_C(631.53)}, + {SIMDE_FLOAT32_C(850.75), SIMDE_FLOAT32_C(-263.55)}, + {SIMDE_FLOAT32_C(139.96), SIMDE_FLOAT32_C(859.14), + SIMDE_FLOAT32_C(-834.47), SIMDE_FLOAT32_C(216.10)}, + INT32_C(0), + {SIMDE_FLOAT32_C(36856.097656), SIMDE_FLOAT32_C(-36254.929688)}}, + {{SIMDE_FLOAT32_C(995.86), SIMDE_FLOAT32_C(529.74)}, + {SIMDE_FLOAT32_C(79.08), SIMDE_FLOAT32_C(947.13)}, + {SIMDE_FLOAT32_C(122.02), SIMDE_FLOAT32_C(-250.00), + SIMDE_FLOAT32_C(-361.82), SIMDE_FLOAT32_C(265.24)}, + INT32_C(1), + {SIMDE_FLOAT32_C(237778.359375), SIMDE_FLOAT32_C(-236252.765625)}}, + {{SIMDE_FLOAT32_C(275.71), SIMDE_FLOAT32_C(2.71)}, + {SIMDE_FLOAT32_C(99.79), SIMDE_FLOAT32_C(-137.67)}, + {SIMDE_FLOAT32_C(-761.19), SIMDE_FLOAT32_C(813.19), + SIMDE_FLOAT32_C(-897.68), SIMDE_FLOAT32_C(653.58)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-104517.312500), SIMDE_FLOAT32_C(104795.734375)}}, + {{SIMDE_FLOAT32_C(396.02), SIMDE_FLOAT32_C(413.06)}, + {SIMDE_FLOAT32_C(514.09), SIMDE_FLOAT32_C(-977.67)}, + {SIMDE_FLOAT32_C(-671.79), SIMDE_FLOAT32_C(-92.13), + SIMDE_FLOAT32_C(-441.32), SIMDE_FLOAT32_C(-374.27)}, + INT32_C(1), + {SIMDE_FLOAT32_C(-89676.710938), SIMDE_FLOAT32_C(90485.789062)}}, + {{SIMDE_FLOAT32_C(-151.97), SIMDE_FLOAT32_C(-79.55)}, + {SIMDE_FLOAT32_C(-214.62), SIMDE_FLOAT32_C(-614.75)}, + {SIMDE_FLOAT32_C(678.79), SIMDE_FLOAT32_C(783.83), + SIMDE_FLOAT32_C(493.05), SIMDE_FLOAT32_C(-896.00)}, + INT32_C(0), + {SIMDE_FLOAT32_C(417134.156250), SIMDE_FLOAT32_C(-417365.687500)}}, + {{SIMDE_FLOAT32_C(104.13), SIMDE_FLOAT32_C(278.54)}, + {SIMDE_FLOAT32_C(171.54), SIMDE_FLOAT32_C(-682.63)}, + {SIMDE_FLOAT32_C(217.09), SIMDE_FLOAT32_C(49.35), + SIMDE_FLOAT32_C(256.50), SIMDE_FLOAT32_C(-92.04)}, + INT32_C(1), + {SIMDE_FLOAT32_C(33791.917969), SIMDE_FLOAT32_C(-33409.250000)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x2_t r; - SIMDE_CONSTIFY_2_(simde_vcmla_rot90_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_( + simde_vcmla_rot90_laneq_f32, r, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), + test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); - // simde_float32x2_t r = simde_vcmla_rot90_laneq_f32(r_, a, b, test_vec[i].lane); - // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + // simde_float32x2_t r = simde_vcmla_rot90_laneq_f32(r_, a, b, + // test_vec[i].lane); simde_test_arm_neon_write_f32x2(2, r, + // SIMDE_TEST_VEC_POS_LAST); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; + for (int i = 0; i < 8; i++) { simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vcmla_rot90_laneq_f32(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -353,8 +434,7 @@ test_simde_vcmla_rot90_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmlaq_rot90_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmlaq_rot90_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float16_t r_[8]; @@ -363,97 +443,157 @@ test_simde_vcmlaq_rot90_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[8]; } test_vec[] = { - - { { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( 420.50), - SIMDE_FLOAT16_VALUE( -699.00), SIMDE_FLOAT16_VALUE( -151.12), SIMDE_FLOAT16_VALUE( -388.25), SIMDE_FLOAT16_VALUE( 350.75) }, - { SIMDE_FLOAT16_VALUE( 890.50), SIMDE_FLOAT16_VALUE( -665.00), SIMDE_FLOAT16_VALUE( -618.00), SIMDE_FLOAT16_VALUE( 190.88), - SIMDE_FLOAT16_VALUE( 218.12), SIMDE_FLOAT16_VALUE( -329.00), SIMDE_FLOAT16_VALUE( -549.50), SIMDE_FLOAT16_VALUE( -460.00) }, - { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -695.50), SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-32544.00), SIMDE_FLOAT16_VALUE( 32960.00), SIMDE_FLOAT16_VALUE( 9592.00), SIMDE_FLOAT16_VALUE( -8984.00), - SIMDE_FLOAT16_VALUE(-16912.00), SIMDE_FLOAT16_VALUE( 16064.00), SIMDE_FLOAT16_VALUE(-23056.00), SIMDE_FLOAT16_VALUE( 23024.00) } }, - { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00), - SIMDE_FLOAT16_VALUE( -786.50), SIMDE_FLOAT16_VALUE( -974.00), SIMDE_FLOAT16_VALUE( -570.50), SIMDE_FLOAT16_VALUE( 369.25) }, - { SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( -40.00), SIMDE_FLOAT16_VALUE( -52.00), SIMDE_FLOAT16_VALUE( 75.88), - SIMDE_FLOAT16_VALUE( 352.25), SIMDE_FLOAT16_VALUE( 324.25), SIMDE_FLOAT16_VALUE( 485.50), SIMDE_FLOAT16_VALUE( 43.50) }, - { SIMDE_FLOAT16_VALUE( -976.00), SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -100.62), SIMDE_FLOAT16_VALUE( -463.75) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( 2708.00), SIMDE_FLOAT16_VALUE( -4288.00), SIMDE_FLOAT16_VALUE( -6328.00), SIMDE_FLOAT16_VALUE( 7364.00), - SIMDE_FLOAT16_VALUE(-29792.00), SIMDE_FLOAT16_VALUE( 28032.00), SIMDE_FLOAT16_VALUE( -4460.00), SIMDE_FLOAT16_VALUE( 4260.00) } }, - { { SIMDE_FLOAT16_VALUE( -973.00), SIMDE_FLOAT16_VALUE( -159.62), SIMDE_FLOAT16_VALUE( -377.00), SIMDE_FLOAT16_VALUE( -853.00), - SIMDE_FLOAT16_VALUE( 831.00), SIMDE_FLOAT16_VALUE( 661.00), SIMDE_FLOAT16_VALUE( -717.00), SIMDE_FLOAT16_VALUE( 891.50) }, - { SIMDE_FLOAT16_VALUE( 535.50), SIMDE_FLOAT16_VALUE( 621.50), SIMDE_FLOAT16_VALUE( 578.50), SIMDE_FLOAT16_VALUE( 430.50), - SIMDE_FLOAT16_VALUE( -574.50), SIMDE_FLOAT16_VALUE( -319.75), SIMDE_FLOAT16_VALUE( -735.00), SIMDE_FLOAT16_VALUE( 248.50) }, - { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 192.38), SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 502.25) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-21232.00), SIMDE_FLOAT16_VALUE( 20096.00), SIMDE_FLOAT16_VALUE(-14408.00), SIMDE_FLOAT16_VALUE( 13176.00), - SIMDE_FLOAT16_VALUE( 11256.00), SIMDE_FLOAT16_VALUE( -9760.00), SIMDE_FLOAT16_VALUE( -8816.00), SIMDE_FLOAT16_VALUE( 8992.00) } }, - { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( -70.81), SIMDE_FLOAT16_VALUE( -596.50), SIMDE_FLOAT16_VALUE( 810.50), - SIMDE_FLOAT16_VALUE( 863.50), SIMDE_FLOAT16_VALUE( 828.50), SIMDE_FLOAT16_VALUE( -563.50), SIMDE_FLOAT16_VALUE( -576.50) }, - { SIMDE_FLOAT16_VALUE( -703.50), SIMDE_FLOAT16_VALUE( 384.00), SIMDE_FLOAT16_VALUE( -772.50), SIMDE_FLOAT16_VALUE( 457.50), - SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( 653.00), SIMDE_FLOAT16_VALUE( -121.00), SIMDE_FLOAT16_VALUE( 945.50) }, - { SIMDE_FLOAT16_VALUE( -280.75), SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( 688.50), SIMDE_FLOAT16_VALUE( 192.00) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( 11456.00), SIMDE_FLOAT16_VALUE(-12152.00), SIMDE_FLOAT16_VALUE( 13792.00), SIMDE_FLOAT16_VALUE(-13576.00), - SIMDE_FLOAT16_VALUE( 21408.00), SIMDE_FLOAT16_VALUE(-19712.00), SIMDE_FLOAT16_VALUE( 29168.00), SIMDE_FLOAT16_VALUE(-30320.00) } }, - { { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -323.75), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -283.75), - SIMDE_FLOAT16_VALUE( -117.75), SIMDE_FLOAT16_VALUE( -841.50), SIMDE_FLOAT16_VALUE( 665.00), SIMDE_FLOAT16_VALUE( -987.00) }, - { SIMDE_FLOAT16_VALUE( -643.00), SIMDE_FLOAT16_VALUE( -152.12), SIMDE_FLOAT16_VALUE( 964.00), SIMDE_FLOAT16_VALUE( 920.00), - SIMDE_FLOAT16_VALUE( 630.50), SIMDE_FLOAT16_VALUE( -669.50), SIMDE_FLOAT16_VALUE( 671.00), SIMDE_FLOAT16_VALUE( 257.00) }, - { SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -857.50), SIMDE_FLOAT16_VALUE( 334.75), SIMDE_FLOAT16_VALUE( -617.50) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 1032.00), SIMDE_FLOAT16_VALUE( -1878.00), SIMDE_FLOAT16_VALUE(-10288.00), SIMDE_FLOAT16_VALUE( 9120.00), - SIMDE_FLOAT16_VALUE( 6724.00), SIMDE_FLOAT16_VALUE( -7684.00), SIMDE_FLOAT16_VALUE( -1961.00), SIMDE_FLOAT16_VALUE( 1639.00) } }, - { { SIMDE_FLOAT16_VALUE( -439.50), SIMDE_FLOAT16_VALUE( 245.12), SIMDE_FLOAT16_VALUE( 111.06), SIMDE_FLOAT16_VALUE( 520.50), - SIMDE_FLOAT16_VALUE( 85.50), SIMDE_FLOAT16_VALUE( 250.25), SIMDE_FLOAT16_VALUE( -680.00), SIMDE_FLOAT16_VALUE( -750.00) }, - { SIMDE_FLOAT16_VALUE( -138.25), SIMDE_FLOAT16_VALUE( -14.62), SIMDE_FLOAT16_VALUE( -921.50), SIMDE_FLOAT16_VALUE( 225.88), - SIMDE_FLOAT16_VALUE( 242.88), SIMDE_FLOAT16_VALUE( 869.50), SIMDE_FLOAT16_VALUE( 298.00), SIMDE_FLOAT16_VALUE( 105.69) }, - { SIMDE_FLOAT16_VALUE( -722.50), SIMDE_FLOAT16_VALUE( -8.75), SIMDE_FLOAT16_VALUE( -245.75), SIMDE_FLOAT16_VALUE( 915.50) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( -567.50), SIMDE_FLOAT16_VALUE( 373.00), SIMDE_FLOAT16_VALUE( 2088.00), SIMDE_FLOAT16_VALUE( -1456.00), - SIMDE_FLOAT16_VALUE( 7692.00), SIMDE_FLOAT16_VALUE( -7356.00), SIMDE_FLOAT16_VALUE( 244.75), SIMDE_FLOAT16_VALUE( -1675.00) } }, - { { SIMDE_FLOAT16_VALUE( 54.19), SIMDE_FLOAT16_VALUE( -928.00), SIMDE_FLOAT16_VALUE( 362.50), SIMDE_FLOAT16_VALUE( -936.50), - SIMDE_FLOAT16_VALUE( 185.88), SIMDE_FLOAT16_VALUE( -244.38), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( -644.00) }, - { SIMDE_FLOAT16_VALUE( -517.00), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -751.50), SIMDE_FLOAT16_VALUE( -974.00), - SIMDE_FLOAT16_VALUE( -144.38), SIMDE_FLOAT16_VALUE( 338.25), SIMDE_FLOAT16_VALUE( 705.00), SIMDE_FLOAT16_VALUE( 116.88) }, - { SIMDE_FLOAT16_VALUE( 49.38), SIMDE_FLOAT16_VALUE( -363.00), SIMDE_FLOAT16_VALUE( -476.25), SIMDE_FLOAT16_VALUE( 106.69) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE( 30416.00), SIMDE_FLOAT16_VALUE(-31296.00), SIMDE_FLOAT16_VALUE( 48448.00), SIMDE_FLOAT16_VALUE(-49024.00), - SIMDE_FLOAT16_VALUE(-16512.00), SIMDE_FLOAT16_VALUE( 16464.00), SIMDE_FLOAT16_VALUE( -4848.00), SIMDE_FLOAT16_VALUE( 5128.00) } }, - { { SIMDE_FLOAT16_VALUE( -726.00), SIMDE_FLOAT16_VALUE( -353.75), SIMDE_FLOAT16_VALUE( 268.50), SIMDE_FLOAT16_VALUE( 729.00), - SIMDE_FLOAT16_VALUE( -470.25), SIMDE_FLOAT16_VALUE( 81.88), SIMDE_FLOAT16_VALUE( 72.25), SIMDE_FLOAT16_VALUE( -992.50) }, - { SIMDE_FLOAT16_VALUE( -615.50), SIMDE_FLOAT16_VALUE( 620.50), SIMDE_FLOAT16_VALUE( -606.50), SIMDE_FLOAT16_VALUE( -327.75), - SIMDE_FLOAT16_VALUE( -331.75), SIMDE_FLOAT16_VALUE( -606.00), SIMDE_FLOAT16_VALUE( -295.75), SIMDE_FLOAT16_VALUE( -275.50) }, - { SIMDE_FLOAT16_VALUE( -752.50), SIMDE_FLOAT16_VALUE( 21.91), SIMDE_FLOAT16_VALUE( 827.00), SIMDE_FLOAT16_VALUE( 600.00) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE(-14320.00), SIMDE_FLOAT16_VALUE( 13240.00), SIMDE_FLOAT16_VALUE( 7448.00), SIMDE_FLOAT16_VALUE( -6452.00), - SIMDE_FLOAT16_VALUE( 12808.00), SIMDE_FLOAT16_VALUE(-13192.00), SIMDE_FLOAT16_VALUE( 6108.00), SIMDE_FLOAT16_VALUE( -7028.00) } } - + + {{SIMDE_FLOAT16_VALUE(226.38), SIMDE_FLOAT16_VALUE(178.88), + SIMDE_FLOAT16_VALUE(181.75), SIMDE_FLOAT16_VALUE(420.50), + SIMDE_FLOAT16_VALUE(-699.00), SIMDE_FLOAT16_VALUE(-151.12), + SIMDE_FLOAT16_VALUE(-388.25), SIMDE_FLOAT16_VALUE(350.75)}, + {SIMDE_FLOAT16_VALUE(890.50), SIMDE_FLOAT16_VALUE(-665.00), + SIMDE_FLOAT16_VALUE(-618.00), SIMDE_FLOAT16_VALUE(190.88), + SIMDE_FLOAT16_VALUE(218.12), SIMDE_FLOAT16_VALUE(-329.00), + SIMDE_FLOAT16_VALUE(-549.50), SIMDE_FLOAT16_VALUE(-460.00)}, + {SIMDE_FLOAT16_VALUE(-49.28), SIMDE_FLOAT16_VALUE(-695.50), + SIMDE_FLOAT16_VALUE(121.50), SIMDE_FLOAT16_VALUE(-78.44)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-32544.00), SIMDE_FLOAT16_VALUE(32960.00), + SIMDE_FLOAT16_VALUE(9592.00), SIMDE_FLOAT16_VALUE(-8984.00), + SIMDE_FLOAT16_VALUE(-16912.00), SIMDE_FLOAT16_VALUE(16064.00), + SIMDE_FLOAT16_VALUE(-23056.00), SIMDE_FLOAT16_VALUE(23024.00)}}, + {{SIMDE_FLOAT16_VALUE(-869.50), SIMDE_FLOAT16_VALUE(-710.00), + SIMDE_FLOAT16_VALUE(457.25), SIMDE_FLOAT16_VALUE(579.00), + SIMDE_FLOAT16_VALUE(-786.50), SIMDE_FLOAT16_VALUE(-974.00), + SIMDE_FLOAT16_VALUE(-570.50), SIMDE_FLOAT16_VALUE(369.25)}, + {SIMDE_FLOAT16_VALUE(48.06), SIMDE_FLOAT16_VALUE(-40.00), + SIMDE_FLOAT16_VALUE(-52.00), SIMDE_FLOAT16_VALUE(75.88), + SIMDE_FLOAT16_VALUE(352.25), SIMDE_FLOAT16_VALUE(324.25), + SIMDE_FLOAT16_VALUE(485.50), SIMDE_FLOAT16_VALUE(43.50)}, + {SIMDE_FLOAT16_VALUE(-976.00), SIMDE_FLOAT16_VALUE(89.44), + SIMDE_FLOAT16_VALUE(-100.62), SIMDE_FLOAT16_VALUE(-463.75)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(2708.00), SIMDE_FLOAT16_VALUE(-4288.00), + SIMDE_FLOAT16_VALUE(-6328.00), SIMDE_FLOAT16_VALUE(7364.00), + SIMDE_FLOAT16_VALUE(-29792.00), SIMDE_FLOAT16_VALUE(28032.00), + SIMDE_FLOAT16_VALUE(-4460.00), SIMDE_FLOAT16_VALUE(4260.00)}}, + {{SIMDE_FLOAT16_VALUE(-973.00), SIMDE_FLOAT16_VALUE(-159.62), + SIMDE_FLOAT16_VALUE(-377.00), SIMDE_FLOAT16_VALUE(-853.00), + SIMDE_FLOAT16_VALUE(831.00), SIMDE_FLOAT16_VALUE(661.00), + SIMDE_FLOAT16_VALUE(-717.00), SIMDE_FLOAT16_VALUE(891.50)}, + {SIMDE_FLOAT16_VALUE(535.50), SIMDE_FLOAT16_VALUE(621.50), + SIMDE_FLOAT16_VALUE(578.50), SIMDE_FLOAT16_VALUE(430.50), + SIMDE_FLOAT16_VALUE(-574.50), SIMDE_FLOAT16_VALUE(-319.75), + SIMDE_FLOAT16_VALUE(-735.00), SIMDE_FLOAT16_VALUE(248.50)}, + {SIMDE_FLOAT16_VALUE(32.59), SIMDE_FLOAT16_VALUE(192.38), + SIMDE_FLOAT16_VALUE(679.00), SIMDE_FLOAT16_VALUE(502.25)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-21232.00), SIMDE_FLOAT16_VALUE(20096.00), + SIMDE_FLOAT16_VALUE(-14408.00), SIMDE_FLOAT16_VALUE(13176.00), + SIMDE_FLOAT16_VALUE(11256.00), SIMDE_FLOAT16_VALUE(-9760.00), + SIMDE_FLOAT16_VALUE(-8816.00), SIMDE_FLOAT16_VALUE(8992.00)}}, + {{SIMDE_FLOAT16_VALUE(-618.50), SIMDE_FLOAT16_VALUE(-70.81), + SIMDE_FLOAT16_VALUE(-596.50), SIMDE_FLOAT16_VALUE(810.50), + SIMDE_FLOAT16_VALUE(863.50), SIMDE_FLOAT16_VALUE(828.50), + SIMDE_FLOAT16_VALUE(-563.50), SIMDE_FLOAT16_VALUE(-576.50)}, + {SIMDE_FLOAT16_VALUE(-703.50), SIMDE_FLOAT16_VALUE(384.00), + SIMDE_FLOAT16_VALUE(-772.50), SIMDE_FLOAT16_VALUE(457.50), + SIMDE_FLOAT16_VALUE(296.00), SIMDE_FLOAT16_VALUE(653.00), + SIMDE_FLOAT16_VALUE(-121.00), SIMDE_FLOAT16_VALUE(945.50)}, + {SIMDE_FLOAT16_VALUE(-280.75), SIMDE_FLOAT16_VALUE(-31.45), + SIMDE_FLOAT16_VALUE(688.50), SIMDE_FLOAT16_VALUE(192.00)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(11456.00), SIMDE_FLOAT16_VALUE(-12152.00), + SIMDE_FLOAT16_VALUE(13792.00), SIMDE_FLOAT16_VALUE(-13576.00), + SIMDE_FLOAT16_VALUE(21408.00), SIMDE_FLOAT16_VALUE(-19712.00), + SIMDE_FLOAT16_VALUE(29168.00), SIMDE_FLOAT16_VALUE(-30320.00)}}, + {{SIMDE_FLOAT16_VALUE(-523.00), SIMDE_FLOAT16_VALUE(-323.75), + SIMDE_FLOAT16_VALUE(-888.00), SIMDE_FLOAT16_VALUE(-283.75), + SIMDE_FLOAT16_VALUE(-117.75), SIMDE_FLOAT16_VALUE(-841.50), + SIMDE_FLOAT16_VALUE(665.00), SIMDE_FLOAT16_VALUE(-987.00)}, + {SIMDE_FLOAT16_VALUE(-643.00), SIMDE_FLOAT16_VALUE(-152.12), + SIMDE_FLOAT16_VALUE(964.00), SIMDE_FLOAT16_VALUE(920.00), + SIMDE_FLOAT16_VALUE(630.50), SIMDE_FLOAT16_VALUE(-669.50), + SIMDE_FLOAT16_VALUE(671.00), SIMDE_FLOAT16_VALUE(257.00)}, + {SIMDE_FLOAT16_VALUE(10.22), SIMDE_FLOAT16_VALUE(-857.50), + SIMDE_FLOAT16_VALUE(334.75), SIMDE_FLOAT16_VALUE(-617.50)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(1032.00), SIMDE_FLOAT16_VALUE(-1878.00), + SIMDE_FLOAT16_VALUE(-10288.00), SIMDE_FLOAT16_VALUE(9120.00), + SIMDE_FLOAT16_VALUE(6724.00), SIMDE_FLOAT16_VALUE(-7684.00), + SIMDE_FLOAT16_VALUE(-1961.00), SIMDE_FLOAT16_VALUE(1639.00)}}, + {{SIMDE_FLOAT16_VALUE(-439.50), SIMDE_FLOAT16_VALUE(245.12), + SIMDE_FLOAT16_VALUE(111.06), SIMDE_FLOAT16_VALUE(520.50), + SIMDE_FLOAT16_VALUE(85.50), SIMDE_FLOAT16_VALUE(250.25), + SIMDE_FLOAT16_VALUE(-680.00), SIMDE_FLOAT16_VALUE(-750.00)}, + {SIMDE_FLOAT16_VALUE(-138.25), SIMDE_FLOAT16_VALUE(-14.62), + SIMDE_FLOAT16_VALUE(-921.50), SIMDE_FLOAT16_VALUE(225.88), + SIMDE_FLOAT16_VALUE(242.88), SIMDE_FLOAT16_VALUE(869.50), + SIMDE_FLOAT16_VALUE(298.00), SIMDE_FLOAT16_VALUE(105.69)}, + {SIMDE_FLOAT16_VALUE(-722.50), SIMDE_FLOAT16_VALUE(-8.75), + SIMDE_FLOAT16_VALUE(-245.75), SIMDE_FLOAT16_VALUE(915.50)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-567.50), SIMDE_FLOAT16_VALUE(373.00), + SIMDE_FLOAT16_VALUE(2088.00), SIMDE_FLOAT16_VALUE(-1456.00), + SIMDE_FLOAT16_VALUE(7692.00), SIMDE_FLOAT16_VALUE(-7356.00), + SIMDE_FLOAT16_VALUE(244.75), SIMDE_FLOAT16_VALUE(-1675.00)}}, + {{SIMDE_FLOAT16_VALUE(54.19), SIMDE_FLOAT16_VALUE(-928.00), + SIMDE_FLOAT16_VALUE(362.50), SIMDE_FLOAT16_VALUE(-936.50), + SIMDE_FLOAT16_VALUE(185.88), SIMDE_FLOAT16_VALUE(-244.38), + SIMDE_FLOAT16_VALUE(924.50), SIMDE_FLOAT16_VALUE(-644.00)}, + {SIMDE_FLOAT16_VALUE(-517.00), SIMDE_FLOAT16_VALUE(-615.00), + SIMDE_FLOAT16_VALUE(-751.50), SIMDE_FLOAT16_VALUE(-974.00), + SIMDE_FLOAT16_VALUE(-144.38), SIMDE_FLOAT16_VALUE(338.25), + SIMDE_FLOAT16_VALUE(705.00), SIMDE_FLOAT16_VALUE(116.88)}, + {SIMDE_FLOAT16_VALUE(49.38), SIMDE_FLOAT16_VALUE(-363.00), + SIMDE_FLOAT16_VALUE(-476.25), SIMDE_FLOAT16_VALUE(106.69)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(30416.00), SIMDE_FLOAT16_VALUE(-31296.00), + SIMDE_FLOAT16_VALUE(48448.00), SIMDE_FLOAT16_VALUE(-49024.00), + SIMDE_FLOAT16_VALUE(-16512.00), SIMDE_FLOAT16_VALUE(16464.00), + SIMDE_FLOAT16_VALUE(-4848.00), SIMDE_FLOAT16_VALUE(5128.00)}}, + {{SIMDE_FLOAT16_VALUE(-726.00), SIMDE_FLOAT16_VALUE(-353.75), + SIMDE_FLOAT16_VALUE(268.50), SIMDE_FLOAT16_VALUE(729.00), + SIMDE_FLOAT16_VALUE(-470.25), SIMDE_FLOAT16_VALUE(81.88), + SIMDE_FLOAT16_VALUE(72.25), SIMDE_FLOAT16_VALUE(-992.50)}, + {SIMDE_FLOAT16_VALUE(-615.50), SIMDE_FLOAT16_VALUE(620.50), + SIMDE_FLOAT16_VALUE(-606.50), SIMDE_FLOAT16_VALUE(-327.75), + SIMDE_FLOAT16_VALUE(-331.75), SIMDE_FLOAT16_VALUE(-606.00), + SIMDE_FLOAT16_VALUE(-295.75), SIMDE_FLOAT16_VALUE(-275.50)}, + {SIMDE_FLOAT16_VALUE(-752.50), SIMDE_FLOAT16_VALUE(21.91), + SIMDE_FLOAT16_VALUE(827.00), SIMDE_FLOAT16_VALUE(600.00)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-14320.00), SIMDE_FLOAT16_VALUE(13240.00), + SIMDE_FLOAT16_VALUE(7448.00), SIMDE_FLOAT16_VALUE(-6452.00), + SIMDE_FLOAT16_VALUE(12808.00), SIMDE_FLOAT16_VALUE(-13192.00), + SIMDE_FLOAT16_VALUE(6108.00), SIMDE_FLOAT16_VALUE(-7028.00)}} + }; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x8_t r; - SIMDE_CONSTIFY_2_(simde_vcmlaq_rot90_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_( + simde_vcmlaq_rot90_lane_f16, r, + (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + test_vec[i].lane, r_, a, b); - simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); - // simde_float16x8_t r = simde_vcmlaq_rot90_lane_f16(r_, a, b, test_vec[i].lane); - // simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), + 1); + // simde_float16x8_t r = simde_vcmlaq_rot90_lane_f16(r_, a, b, + // test_vec[i].lane); simde_test_arm_neon_write_f16x8(2, r, + // SIMDE_TEST_VEC_POS_LAST); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; + for (int i = 0; i < 8; i++) { simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x8_t r = simde_vcmlaq_lane_f16(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -464,8 +604,7 @@ test_simde_vcmlaq_rot90_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmlaq_rot90_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmlaq_rot90_lane_f32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[4]; @@ -474,69 +613,93 @@ test_simde_vcmlaq_rot90_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[4]; } test_vec[] = { - { { SIMDE_FLOAT32_C( 301.65), SIMDE_FLOAT32_C( 490.71), SIMDE_FLOAT32_C( -744.66), SIMDE_FLOAT32_C( -738.17) }, - { SIMDE_FLOAT32_C( -301.20), SIMDE_FLOAT32_C( -904.34), SIMDE_FLOAT32_C( 771.98), SIMDE_FLOAT32_C( 233.71) }, - { SIMDE_FLOAT32_C( 830.18), SIMDE_FLOAT32_C( 979.39) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(751066.625000), SIMDE_FLOAT32_C(-750274.312500), SIMDE_FLOAT32_C(-194766.031250), SIMDE_FLOAT32_C(193283.203125) } }, - { { SIMDE_FLOAT32_C( -38.01), SIMDE_FLOAT32_C( 151.61), SIMDE_FLOAT32_C( 201.45), SIMDE_FLOAT32_C( -747.32) }, - { SIMDE_FLOAT32_C( -331.17), SIMDE_FLOAT32_C( 7.62), SIMDE_FLOAT32_C( -454.77), SIMDE_FLOAT32_C( -381.81) }, - { SIMDE_FLOAT32_C( 236.39), SIMDE_FLOAT32_C( -158.94) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-1839.301758), SIMDE_FLOAT32_C(1952.901733), SIMDE_FLOAT32_C(90457.515625), SIMDE_FLOAT32_C(-91003.382812) } }, - { { SIMDE_FLOAT32_C( 605.85), SIMDE_FLOAT32_C( 244.27), SIMDE_FLOAT32_C( -426.53), SIMDE_FLOAT32_C( -969.18) }, - { SIMDE_FLOAT32_C( 322.13), SIMDE_FLOAT32_C( 863.77), SIMDE_FLOAT32_C( -685.35), SIMDE_FLOAT32_C( -710.70) }, - { SIMDE_FLOAT32_C( -791.12), SIMDE_FLOAT32_C( 373.53) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(683951.562500), SIMDE_FLOAT32_C(-683101.437500), SIMDE_FLOAT32_C(-562675.500000), SIMDE_FLOAT32_C(561279.812500) } }, - { { SIMDE_FLOAT32_C( -606.46), SIMDE_FLOAT32_C( -507.55), SIMDE_FLOAT32_C( -68.24), SIMDE_FLOAT32_C( -823.05) }, - { SIMDE_FLOAT32_C( -359.95), SIMDE_FLOAT32_C( 611.92), SIMDE_FLOAT32_C( 514.14), SIMDE_FLOAT32_C( -660.86) }, - { SIMDE_FLOAT32_C( 181.81), SIMDE_FLOAT32_C( 115.86) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-111859.632812), SIMDE_FLOAT32_C(110745.617188), SIMDE_FLOAT32_C(120082.710938), SIMDE_FLOAT32_C(-120974.000000) } }, - { { SIMDE_FLOAT32_C( 951.84), SIMDE_FLOAT32_C( -864.96), SIMDE_FLOAT32_C( 970.59), SIMDE_FLOAT32_C( 769.97) }, - { SIMDE_FLOAT32_C( 609.64), SIMDE_FLOAT32_C( -580.87), SIMDE_FLOAT32_C( 358.59), SIMDE_FLOAT32_C( 350.56) }, - { SIMDE_FLOAT32_C( 26.31), SIMDE_FLOAT32_C( 163.74) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(16234.529297), SIMDE_FLOAT32_C(-16147.649414), SIMDE_FLOAT32_C(-8252.643555), SIMDE_FLOAT32_C(9993.203125) } }, - { { SIMDE_FLOAT32_C( -636.68), SIMDE_FLOAT32_C( 554.68), SIMDE_FLOAT32_C( -385.40), SIMDE_FLOAT32_C( -565.95) }, - { SIMDE_FLOAT32_C( 129.69), SIMDE_FLOAT32_C( 961.79), SIMDE_FLOAT32_C( -333.22), SIMDE_FLOAT32_C( 69.65) }, - { SIMDE_FLOAT32_C( 726.96), SIMDE_FLOAT32_C( 131.41) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-699819.562500), SIMDE_FLOAT32_C(699737.562500), SIMDE_FLOAT32_C(-51018.167969), SIMDE_FLOAT32_C(50066.816406) } }, - { { SIMDE_FLOAT32_C( -211.91), SIMDE_FLOAT32_C( 829.24), SIMDE_FLOAT32_C( -475.13), SIMDE_FLOAT32_C( 562.33) }, - { SIMDE_FLOAT32_C( 515.59), SIMDE_FLOAT32_C( -290.69), SIMDE_FLOAT32_C( -816.53), SIMDE_FLOAT32_C( 17.39) }, - { SIMDE_FLOAT32_C( 268.14), SIMDE_FLOAT32_C( 729.88) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(77733.710938), SIMDE_FLOAT32_C(-77116.382812), SIMDE_FLOAT32_C(-5138.084473), SIMDE_FLOAT32_C(5225.284668) } }, - { { SIMDE_FLOAT32_C( -894.99), SIMDE_FLOAT32_C( 516.42), SIMDE_FLOAT32_C( -169.55), SIMDE_FLOAT32_C( 696.41) }, - { SIMDE_FLOAT32_C( -388.51), SIMDE_FLOAT32_C( 987.71), SIMDE_FLOAT32_C( -91.49), SIMDE_FLOAT32_C( -970.85) }, - { SIMDE_FLOAT32_C( 357.28), SIMDE_FLOAT32_C( -28.01) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-353784.031250), SIMDE_FLOAT32_C(353405.468750), SIMDE_FLOAT32_C(346695.718750), SIMDE_FLOAT32_C(-346168.875000) } } - }; + {{SIMDE_FLOAT32_C(301.65), SIMDE_FLOAT32_C(490.71), + SIMDE_FLOAT32_C(-744.66), SIMDE_FLOAT32_C(-738.17)}, + {SIMDE_FLOAT32_C(-301.20), SIMDE_FLOAT32_C(-904.34), + SIMDE_FLOAT32_C(771.98), SIMDE_FLOAT32_C(233.71)}, + {SIMDE_FLOAT32_C(830.18), SIMDE_FLOAT32_C(979.39)}, + INT32_C(0), + {SIMDE_FLOAT32_C(751066.625000), SIMDE_FLOAT32_C(-750274.312500), + SIMDE_FLOAT32_C(-194766.031250), SIMDE_FLOAT32_C(193283.203125)}}, + {{SIMDE_FLOAT32_C(-38.01), SIMDE_FLOAT32_C(151.61), + SIMDE_FLOAT32_C(201.45), SIMDE_FLOAT32_C(-747.32)}, + {SIMDE_FLOAT32_C(-331.17), SIMDE_FLOAT32_C(7.62), + SIMDE_FLOAT32_C(-454.77), SIMDE_FLOAT32_C(-381.81)}, + {SIMDE_FLOAT32_C(236.39), SIMDE_FLOAT32_C(-158.94)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-1839.301758), SIMDE_FLOAT32_C(1952.901733), + SIMDE_FLOAT32_C(90457.515625), SIMDE_FLOAT32_C(-91003.382812)}}, + {{SIMDE_FLOAT32_C(605.85), SIMDE_FLOAT32_C(244.27), + SIMDE_FLOAT32_C(-426.53), SIMDE_FLOAT32_C(-969.18)}, + {SIMDE_FLOAT32_C(322.13), SIMDE_FLOAT32_C(863.77), + SIMDE_FLOAT32_C(-685.35), SIMDE_FLOAT32_C(-710.70)}, + {SIMDE_FLOAT32_C(-791.12), SIMDE_FLOAT32_C(373.53)}, + INT32_C(0), + {SIMDE_FLOAT32_C(683951.562500), SIMDE_FLOAT32_C(-683101.437500), + SIMDE_FLOAT32_C(-562675.500000), SIMDE_FLOAT32_C(561279.812500)}}, + {{SIMDE_FLOAT32_C(-606.46), SIMDE_FLOAT32_C(-507.55), + SIMDE_FLOAT32_C(-68.24), SIMDE_FLOAT32_C(-823.05)}, + {SIMDE_FLOAT32_C(-359.95), SIMDE_FLOAT32_C(611.92), + SIMDE_FLOAT32_C(514.14), SIMDE_FLOAT32_C(-660.86)}, + {SIMDE_FLOAT32_C(181.81), SIMDE_FLOAT32_C(115.86)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-111859.632812), SIMDE_FLOAT32_C(110745.617188), + SIMDE_FLOAT32_C(120082.710938), SIMDE_FLOAT32_C(-120974.000000)}}, + {{SIMDE_FLOAT32_C(951.84), SIMDE_FLOAT32_C(-864.96), + SIMDE_FLOAT32_C(970.59), SIMDE_FLOAT32_C(769.97)}, + {SIMDE_FLOAT32_C(609.64), SIMDE_FLOAT32_C(-580.87), + SIMDE_FLOAT32_C(358.59), SIMDE_FLOAT32_C(350.56)}, + {SIMDE_FLOAT32_C(26.31), SIMDE_FLOAT32_C(163.74)}, + INT32_C(0), + {SIMDE_FLOAT32_C(16234.529297), SIMDE_FLOAT32_C(-16147.649414), + SIMDE_FLOAT32_C(-8252.643555), SIMDE_FLOAT32_C(9993.203125)}}, + {{SIMDE_FLOAT32_C(-636.68), SIMDE_FLOAT32_C(554.68), + SIMDE_FLOAT32_C(-385.40), SIMDE_FLOAT32_C(-565.95)}, + {SIMDE_FLOAT32_C(129.69), SIMDE_FLOAT32_C(961.79), + SIMDE_FLOAT32_C(-333.22), SIMDE_FLOAT32_C(69.65)}, + {SIMDE_FLOAT32_C(726.96), SIMDE_FLOAT32_C(131.41)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-699819.562500), SIMDE_FLOAT32_C(699737.562500), + SIMDE_FLOAT32_C(-51018.167969), SIMDE_FLOAT32_C(50066.816406)}}, + {{SIMDE_FLOAT32_C(-211.91), SIMDE_FLOAT32_C(829.24), + SIMDE_FLOAT32_C(-475.13), SIMDE_FLOAT32_C(562.33)}, + {SIMDE_FLOAT32_C(515.59), SIMDE_FLOAT32_C(-290.69), + SIMDE_FLOAT32_C(-816.53), SIMDE_FLOAT32_C(17.39)}, + {SIMDE_FLOAT32_C(268.14), SIMDE_FLOAT32_C(729.88)}, + INT32_C(0), + {SIMDE_FLOAT32_C(77733.710938), SIMDE_FLOAT32_C(-77116.382812), + SIMDE_FLOAT32_C(-5138.084473), SIMDE_FLOAT32_C(5225.284668)}}, + {{SIMDE_FLOAT32_C(-894.99), SIMDE_FLOAT32_C(516.42), + SIMDE_FLOAT32_C(-169.55), SIMDE_FLOAT32_C(696.41)}, + {SIMDE_FLOAT32_C(-388.51), SIMDE_FLOAT32_C(987.71), + SIMDE_FLOAT32_C(-91.49), SIMDE_FLOAT32_C(-970.85)}, + {SIMDE_FLOAT32_C(357.28), SIMDE_FLOAT32_C(-28.01)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-353784.031250), SIMDE_FLOAT32_C(353405.468750), + SIMDE_FLOAT32_C(346695.718750), SIMDE_FLOAT32_C(-346168.875000)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x4_t r = simde_vcmlaq_rot90_lane_f32(r_, a, b, 0); - // SIMDE_CONSTIFY_2_(simde_vcmlaq_rot90_lane_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + // SIMDE_CONSTIFY_2_(simde_vcmlaq_rot90_lane_f32, r, (HEDLEY_UNREACHABLE(), + // simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); - simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), + 1); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 0, 0, 0, 0, 0, 0, 0}; + for (int i = 0; i < 8; i++) { simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcmlaq_lane_f32(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -547,8 +710,7 @@ test_simde_vcmlaq_rot90_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmlaq_rot90_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmlaq_rot90_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float16_t r_[8]; @@ -557,103 +719,170 @@ test_simde_vcmlaq_rot90_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[8]; } test_vec[] = { - { { SIMDE_FLOAT16_VALUE( -30.36), SIMDE_FLOAT16_VALUE( 631.50), SIMDE_FLOAT16_VALUE( 851.00), SIMDE_FLOAT16_VALUE( -263.50), - SIMDE_FLOAT16_VALUE( 140.00), SIMDE_FLOAT16_VALUE( 859.00), SIMDE_FLOAT16_VALUE( -834.50), SIMDE_FLOAT16_VALUE( 216.12) }, - { SIMDE_FLOAT16_VALUE( 996.00), SIMDE_FLOAT16_VALUE( 529.50), SIMDE_FLOAT16_VALUE( 79.06), SIMDE_FLOAT16_VALUE( 947.00), - SIMDE_FLOAT16_VALUE( 122.00), SIMDE_FLOAT16_VALUE( -250.00), SIMDE_FLOAT16_VALUE( -361.75), SIMDE_FLOAT16_VALUE( 265.25) }, - { SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 2.71), SIMDE_FLOAT16_VALUE( 99.81), SIMDE_FLOAT16_VALUE( -137.62), - SIMDE_FLOAT16_VALUE( -761.00), SIMDE_FLOAT16_VALUE( 813.00), SIMDE_FLOAT16_VALUE( -897.50), SIMDE_FLOAT16_VALUE( 653.50) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-31088.00), SIMDE_FLOAT16_VALUE( 31696.00), SIMDE_FLOAT16_VALUE(-54688.00), SIMDE_FLOAT16_VALUE( 55296.00), - SIMDE_FLOAT16_VALUE( 14808.00), SIMDE_FLOAT16_VALUE(-13808.00), SIMDE_FLOAT16_VALUE(-16400.00), SIMDE_FLOAT16_VALUE( 15776.00) } }, - { { SIMDE_FLOAT16_VALUE( 396.00), SIMDE_FLOAT16_VALUE( 413.00), SIMDE_FLOAT16_VALUE( 514.00), SIMDE_FLOAT16_VALUE( -977.50), - SIMDE_FLOAT16_VALUE( -672.00), SIMDE_FLOAT16_VALUE( -92.12), SIMDE_FLOAT16_VALUE( -441.25), SIMDE_FLOAT16_VALUE( -374.25) }, - { SIMDE_FLOAT16_VALUE( -152.00), SIMDE_FLOAT16_VALUE( -79.56), SIMDE_FLOAT16_VALUE( -214.62), SIMDE_FLOAT16_VALUE( -615.00), - SIMDE_FLOAT16_VALUE( 79.00), SIMDE_FLOAT16_VALUE( 84.00), SIMDE_FLOAT16_VALUE( 493.00), SIMDE_FLOAT16_VALUE( -96.00) }, - { SIMDE_FLOAT16_VALUE( 104.12), SIMDE_FLOAT16_VALUE( 78.50), SIMDE_FLOAT16_VALUE( 171.50), SIMDE_FLOAT16_VALUE( -682.50), - SIMDE_FLOAT16_VALUE( 217.12), SIMDE_FLOAT16_VALUE( 49.34), SIMDE_FLOAT16_VALUE( 256.50), SIMDE_FLOAT16_VALUE( -92.06) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( 6640.00), SIMDE_FLOAT16_VALUE( -5832.00), SIMDE_FLOAT16_VALUE( 48800.00), SIMDE_FLOAT16_VALUE(-49248.00), - SIMDE_FLOAT16_VALUE( -7264.00), SIMDE_FLOAT16_VALUE( 6500.00), SIMDE_FLOAT16_VALUE( 7096.00), SIMDE_FLOAT16_VALUE( -7912.00) } }, - { { SIMDE_FLOAT16_VALUE( -728.00), SIMDE_FLOAT16_VALUE( -108.38), SIMDE_FLOAT16_VALUE( -77.88), SIMDE_FLOAT16_VALUE( -353.00), - SIMDE_FLOAT16_VALUE( -239.00), SIMDE_FLOAT16_VALUE( 704.50), SIMDE_FLOAT16_VALUE( 914.00), SIMDE_FLOAT16_VALUE( -211.12) }, - { SIMDE_FLOAT16_VALUE( -473.25), SIMDE_FLOAT16_VALUE( 74.38), SIMDE_FLOAT16_VALUE( 904.50), SIMDE_FLOAT16_VALUE( -290.50), - SIMDE_FLOAT16_VALUE( -796.00), SIMDE_FLOAT16_VALUE( 421.25), SIMDE_FLOAT16_VALUE( 215.75), SIMDE_FLOAT16_VALUE( 249.38) }, - { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -720.00), SIMDE_FLOAT16_VALUE( 32.75), SIMDE_FLOAT16_VALUE( -487.75), - SIMDE_FLOAT16_VALUE( -705.50), SIMDE_FLOAT16_VALUE( -468.00), SIMDE_FLOAT16_VALUE( -789.00), SIMDE_FLOAT16_VALUE( -866.00) }, - INT32_C( 2), - { SIMDE_FLOAT16_VALUE( -3164.00), SIMDE_FLOAT16_VALUE( 2328.00), SIMDE_FLOAT16_VALUE( 9440.00), SIMDE_FLOAT16_VALUE( -9864.00), - SIMDE_FLOAT16_VALUE(-14032.00), SIMDE_FLOAT16_VALUE( 14504.00), SIMDE_FLOAT16_VALUE( -7252.00), SIMDE_FLOAT16_VALUE( 7956.00) } }, - { { SIMDE_FLOAT16_VALUE( -891.50), SIMDE_FLOAT16_VALUE( -299.00), SIMDE_FLOAT16_VALUE( -595.00), SIMDE_FLOAT16_VALUE( -662.00), - SIMDE_FLOAT16_VALUE( -914.00), SIMDE_FLOAT16_VALUE( 674.50), SIMDE_FLOAT16_VALUE( 771.50), SIMDE_FLOAT16_VALUE( 14.33) }, - { SIMDE_FLOAT16_VALUE( 880.00), SIMDE_FLOAT16_VALUE( 767.00), SIMDE_FLOAT16_VALUE( -738.50), SIMDE_FLOAT16_VALUE( 581.50), - SIMDE_FLOAT16_VALUE( -342.00), SIMDE_FLOAT16_VALUE( 580.50), SIMDE_FLOAT16_VALUE( 534.00), SIMDE_FLOAT16_VALUE( -671.00) }, - { SIMDE_FLOAT16_VALUE( -482.75), SIMDE_FLOAT16_VALUE( 382.25), SIMDE_FLOAT16_VALUE( 503.00), SIMDE_FLOAT16_VALUE( 35.00), - SIMDE_FLOAT16_VALUE( 315.50), SIMDE_FLOAT16_VALUE( -23.56), SIMDE_FLOAT16_VALUE( 53.88), SIMDE_FLOAT16_VALUE( 722.00) }, - INT32_C( 3), - { SIMDE_FLOAT16_VALUE(-27744.00), SIMDE_FLOAT16_VALUE( 26544.00), SIMDE_FLOAT16_VALUE(-20944.00), SIMDE_FLOAT16_VALUE( 19696.00), - SIMDE_FLOAT16_VALUE(-21232.00), SIMDE_FLOAT16_VALUE( 20992.00), SIMDE_FLOAT16_VALUE( 24256.00), SIMDE_FLOAT16_VALUE(-23472.00) } }, - { { SIMDE_FLOAT16_VALUE( 525.50), SIMDE_FLOAT16_VALUE( -679.00), SIMDE_FLOAT16_VALUE( 491.50), SIMDE_FLOAT16_VALUE( -505.00), - SIMDE_FLOAT16_VALUE( 914.50), SIMDE_FLOAT16_VALUE( -312.00), SIMDE_FLOAT16_VALUE( -404.50), SIMDE_FLOAT16_VALUE( -634.00) }, - { SIMDE_FLOAT16_VALUE( -86.62), SIMDE_FLOAT16_VALUE( -914.50), SIMDE_FLOAT16_VALUE( -839.50), SIMDE_FLOAT16_VALUE( 817.50), - SIMDE_FLOAT16_VALUE( -187.25), SIMDE_FLOAT16_VALUE( 422.75), SIMDE_FLOAT16_VALUE( 604.50), SIMDE_FLOAT16_VALUE( -735.00) }, - { SIMDE_FLOAT16_VALUE( -21.31), SIMDE_FLOAT16_VALUE( -29.59), SIMDE_FLOAT16_VALUE( -725.00), SIMDE_FLOAT16_VALUE( -503.00), - SIMDE_FLOAT16_VALUE( -75.56), SIMDE_FLOAT16_VALUE( 215.38), SIMDE_FLOAT16_VALUE( -742.00), SIMDE_FLOAT16_VALUE( -854.00) }, - INT32_C( 0), - { SIMDE_FLOAT16_VALUE(-18960.00), SIMDE_FLOAT16_VALUE( 18816.00), SIMDE_FLOAT16_VALUE( 17920.00), SIMDE_FLOAT16_VALUE(-17920.00), - SIMDE_FLOAT16_VALUE( 9928.00), SIMDE_FLOAT16_VALUE( -9320.00), SIMDE_FLOAT16_VALUE(-16072.00), SIMDE_FLOAT16_VALUE( 15032.00) } }, - { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( 164.88), SIMDE_FLOAT16_VALUE( 304.75), SIMDE_FLOAT16_VALUE( -530.50), - SIMDE_FLOAT16_VALUE( -603.50), SIMDE_FLOAT16_VALUE( 730.50), SIMDE_FLOAT16_VALUE( 46.66), SIMDE_FLOAT16_VALUE( 629.00) }, - { SIMDE_FLOAT16_VALUE( -537.00), SIMDE_FLOAT16_VALUE( 637.00), SIMDE_FLOAT16_VALUE( 884.50), SIMDE_FLOAT16_VALUE( 378.25), - SIMDE_FLOAT16_VALUE( -10.17), SIMDE_FLOAT16_VALUE( -730.00), SIMDE_FLOAT16_VALUE( -981.50), SIMDE_FLOAT16_VALUE( 453.25) }, - { SIMDE_FLOAT16_VALUE( -575.00), SIMDE_FLOAT16_VALUE( 12.88), SIMDE_FLOAT16_VALUE( -667.50), SIMDE_FLOAT16_VALUE( 380.50), - SIMDE_FLOAT16_VALUE( 374.75), SIMDE_FLOAT16_VALUE( -222.50), SIMDE_FLOAT16_VALUE( 206.88), SIMDE_FLOAT16_VALUE( 502.25) }, - INT32_C( 1), - { SIMDE_FLOAT16_VALUE( -8824.00), SIMDE_FLOAT16_VALUE( 8368.00), SIMDE_FLOAT16_VALUE( -4568.00), SIMDE_FLOAT16_VALUE( 4344.00), - SIMDE_FLOAT16_VALUE( 8800.00), SIMDE_FLOAT16_VALUE( -8672.00), SIMDE_FLOAT16_VALUE( -5792.00), SIMDE_FLOAT16_VALUE( 6468.00) } }, - { { SIMDE_FLOAT16_VALUE( -825.50), SIMDE_FLOAT16_VALUE( -472.75), SIMDE_FLOAT16_VALUE( -531.00), SIMDE_FLOAT16_VALUE( -366.75), - SIMDE_FLOAT16_VALUE( 143.12), SIMDE_FLOAT16_VALUE( 698.50), SIMDE_FLOAT16_VALUE( 700.00), SIMDE_FLOAT16_VALUE( 498.25) }, - { SIMDE_FLOAT16_VALUE( 908.00), SIMDE_FLOAT16_VALUE( 845.50), SIMDE_FLOAT16_VALUE( -383.50), SIMDE_FLOAT16_VALUE( 383.50), - SIMDE_FLOAT16_VALUE( 357.75), SIMDE_FLOAT16_VALUE( -900.50), SIMDE_FLOAT16_VALUE( -802.00), SIMDE_FLOAT16_VALUE( 966.50) }, - { SIMDE_FLOAT16_VALUE( -993.00), SIMDE_FLOAT16_VALUE( 477.50), SIMDE_FLOAT16_VALUE( -23.00), SIMDE_FLOAT16_VALUE( 102.38), - SIMDE_FLOAT16_VALUE( 988.50), SIMDE_FLOAT16_VALUE( -311.75), SIMDE_FLOAT16_VALUE( -668.50), SIMDE_FLOAT16_VALUE( 148.25) }, - INT32_C( 2), - { SIMDE_FLOAT16_VALUE( 18624.00), SIMDE_FLOAT16_VALUE(-19920.00), SIMDE_FLOAT16_VALUE( 8288.00), SIMDE_FLOAT16_VALUE( -9184.00), - SIMDE_FLOAT16_VALUE(-20576.00), SIMDE_FLOAT16_VALUE( 21408.00), SIMDE_FLOAT16_VALUE( 22928.00), SIMDE_FLOAT16_VALUE(-21728.00) } }, - { { SIMDE_FLOAT16_VALUE( 213.88), SIMDE_FLOAT16_VALUE( 337.75), SIMDE_FLOAT16_VALUE( 330.50), SIMDE_FLOAT16_VALUE( -88.56), - SIMDE_FLOAT16_VALUE( 191.12), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 775.50) }, - { SIMDE_FLOAT16_VALUE( 295.50), SIMDE_FLOAT16_VALUE( 687.00), SIMDE_FLOAT16_VALUE( 406.25), SIMDE_FLOAT16_VALUE( 439.50), - SIMDE_FLOAT16_VALUE( -827.50), SIMDE_FLOAT16_VALUE( 733.00), SIMDE_FLOAT16_VALUE( 499.00), SIMDE_FLOAT16_VALUE( 931.00) }, - { SIMDE_FLOAT16_VALUE( 790.00), SIMDE_FLOAT16_VALUE( -979.00), SIMDE_FLOAT16_VALUE( 70.62), SIMDE_FLOAT16_VALUE( -47.00), - SIMDE_FLOAT16_VALUE( 228.50), SIMDE_FLOAT16_VALUE( -233.50), SIMDE_FLOAT16_VALUE( -467.50), SIMDE_FLOAT16_VALUE( 545.00) }, - INT32_C( 3), - { SIMDE_FLOAT16_VALUE( 32496.00), SIMDE_FLOAT16_VALUE(-31952.00), SIMDE_FLOAT16_VALUE( 20992.00), SIMDE_FLOAT16_VALUE(-20752.00), - SIMDE_FLOAT16_VALUE( 34656.00), SIMDE_FLOAT16_VALUE(-35072.00), SIMDE_FLOAT16_VALUE( 43648.00), SIMDE_FLOAT16_VALUE(-42976.00) } } - }; + {{SIMDE_FLOAT16_VALUE(-30.36), SIMDE_FLOAT16_VALUE(631.50), + SIMDE_FLOAT16_VALUE(851.00), SIMDE_FLOAT16_VALUE(-263.50), + SIMDE_FLOAT16_VALUE(140.00), SIMDE_FLOAT16_VALUE(859.00), + SIMDE_FLOAT16_VALUE(-834.50), SIMDE_FLOAT16_VALUE(216.12)}, + {SIMDE_FLOAT16_VALUE(996.00), SIMDE_FLOAT16_VALUE(529.50), + SIMDE_FLOAT16_VALUE(79.06), SIMDE_FLOAT16_VALUE(947.00), + SIMDE_FLOAT16_VALUE(122.00), SIMDE_FLOAT16_VALUE(-250.00), + SIMDE_FLOAT16_VALUE(-361.75), SIMDE_FLOAT16_VALUE(265.25)}, + {SIMDE_FLOAT16_VALUE(58.66), SIMDE_FLOAT16_VALUE(2.71), + SIMDE_FLOAT16_VALUE(99.81), SIMDE_FLOAT16_VALUE(-137.62), + SIMDE_FLOAT16_VALUE(-761.00), SIMDE_FLOAT16_VALUE(813.00), + SIMDE_FLOAT16_VALUE(-897.50), SIMDE_FLOAT16_VALUE(653.50)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-31088.00), SIMDE_FLOAT16_VALUE(31696.00), + SIMDE_FLOAT16_VALUE(-54688.00), SIMDE_FLOAT16_VALUE(55296.00), + SIMDE_FLOAT16_VALUE(14808.00), SIMDE_FLOAT16_VALUE(-13808.00), + SIMDE_FLOAT16_VALUE(-16400.00), SIMDE_FLOAT16_VALUE(15776.00)}}, + {{SIMDE_FLOAT16_VALUE(396.00), SIMDE_FLOAT16_VALUE(413.00), + SIMDE_FLOAT16_VALUE(514.00), SIMDE_FLOAT16_VALUE(-977.50), + SIMDE_FLOAT16_VALUE(-672.00), SIMDE_FLOAT16_VALUE(-92.12), + SIMDE_FLOAT16_VALUE(-441.25), SIMDE_FLOAT16_VALUE(-374.25)}, + {SIMDE_FLOAT16_VALUE(-152.00), SIMDE_FLOAT16_VALUE(-79.56), + SIMDE_FLOAT16_VALUE(-214.62), SIMDE_FLOAT16_VALUE(-615.00), + SIMDE_FLOAT16_VALUE(79.00), SIMDE_FLOAT16_VALUE(84.00), + SIMDE_FLOAT16_VALUE(493.00), SIMDE_FLOAT16_VALUE(-96.00)}, + {SIMDE_FLOAT16_VALUE(104.12), SIMDE_FLOAT16_VALUE(78.50), + SIMDE_FLOAT16_VALUE(171.50), SIMDE_FLOAT16_VALUE(-682.50), + SIMDE_FLOAT16_VALUE(217.12), SIMDE_FLOAT16_VALUE(49.34), + SIMDE_FLOAT16_VALUE(256.50), SIMDE_FLOAT16_VALUE(-92.06)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(6640.00), SIMDE_FLOAT16_VALUE(-5832.00), + SIMDE_FLOAT16_VALUE(48800.00), SIMDE_FLOAT16_VALUE(-49248.00), + SIMDE_FLOAT16_VALUE(-7264.00), SIMDE_FLOAT16_VALUE(6500.00), + SIMDE_FLOAT16_VALUE(7096.00), SIMDE_FLOAT16_VALUE(-7912.00)}}, + {{SIMDE_FLOAT16_VALUE(-728.00), SIMDE_FLOAT16_VALUE(-108.38), + SIMDE_FLOAT16_VALUE(-77.88), SIMDE_FLOAT16_VALUE(-353.00), + SIMDE_FLOAT16_VALUE(-239.00), SIMDE_FLOAT16_VALUE(704.50), + SIMDE_FLOAT16_VALUE(914.00), SIMDE_FLOAT16_VALUE(-211.12)}, + {SIMDE_FLOAT16_VALUE(-473.25), SIMDE_FLOAT16_VALUE(74.38), + SIMDE_FLOAT16_VALUE(904.50), SIMDE_FLOAT16_VALUE(-290.50), + SIMDE_FLOAT16_VALUE(-796.00), SIMDE_FLOAT16_VALUE(421.25), + SIMDE_FLOAT16_VALUE(215.75), SIMDE_FLOAT16_VALUE(249.38)}, + {SIMDE_FLOAT16_VALUE(-523.00), SIMDE_FLOAT16_VALUE(-720.00), + SIMDE_FLOAT16_VALUE(32.75), SIMDE_FLOAT16_VALUE(-487.75), + SIMDE_FLOAT16_VALUE(-705.50), SIMDE_FLOAT16_VALUE(-468.00), + SIMDE_FLOAT16_VALUE(-789.00), SIMDE_FLOAT16_VALUE(-866.00)}, + INT32_C(2), + {SIMDE_FLOAT16_VALUE(-3164.00), SIMDE_FLOAT16_VALUE(2328.00), + SIMDE_FLOAT16_VALUE(9440.00), SIMDE_FLOAT16_VALUE(-9864.00), + SIMDE_FLOAT16_VALUE(-14032.00), SIMDE_FLOAT16_VALUE(14504.00), + SIMDE_FLOAT16_VALUE(-7252.00), SIMDE_FLOAT16_VALUE(7956.00)}}, + {{SIMDE_FLOAT16_VALUE(-891.50), SIMDE_FLOAT16_VALUE(-299.00), + SIMDE_FLOAT16_VALUE(-595.00), SIMDE_FLOAT16_VALUE(-662.00), + SIMDE_FLOAT16_VALUE(-914.00), SIMDE_FLOAT16_VALUE(674.50), + SIMDE_FLOAT16_VALUE(771.50), SIMDE_FLOAT16_VALUE(14.33)}, + {SIMDE_FLOAT16_VALUE(880.00), SIMDE_FLOAT16_VALUE(767.00), + SIMDE_FLOAT16_VALUE(-738.50), SIMDE_FLOAT16_VALUE(581.50), + SIMDE_FLOAT16_VALUE(-342.00), SIMDE_FLOAT16_VALUE(580.50), + SIMDE_FLOAT16_VALUE(534.00), SIMDE_FLOAT16_VALUE(-671.00)}, + {SIMDE_FLOAT16_VALUE(-482.75), SIMDE_FLOAT16_VALUE(382.25), + SIMDE_FLOAT16_VALUE(503.00), SIMDE_FLOAT16_VALUE(35.00), + SIMDE_FLOAT16_VALUE(315.50), SIMDE_FLOAT16_VALUE(-23.56), + SIMDE_FLOAT16_VALUE(53.88), SIMDE_FLOAT16_VALUE(722.00)}, + INT32_C(3), + {SIMDE_FLOAT16_VALUE(-27744.00), SIMDE_FLOAT16_VALUE(26544.00), + SIMDE_FLOAT16_VALUE(-20944.00), SIMDE_FLOAT16_VALUE(19696.00), + SIMDE_FLOAT16_VALUE(-21232.00), SIMDE_FLOAT16_VALUE(20992.00), + SIMDE_FLOAT16_VALUE(24256.00), SIMDE_FLOAT16_VALUE(-23472.00)}}, + {{SIMDE_FLOAT16_VALUE(525.50), SIMDE_FLOAT16_VALUE(-679.00), + SIMDE_FLOAT16_VALUE(491.50), SIMDE_FLOAT16_VALUE(-505.00), + SIMDE_FLOAT16_VALUE(914.50), SIMDE_FLOAT16_VALUE(-312.00), + SIMDE_FLOAT16_VALUE(-404.50), SIMDE_FLOAT16_VALUE(-634.00)}, + {SIMDE_FLOAT16_VALUE(-86.62), SIMDE_FLOAT16_VALUE(-914.50), + SIMDE_FLOAT16_VALUE(-839.50), SIMDE_FLOAT16_VALUE(817.50), + SIMDE_FLOAT16_VALUE(-187.25), SIMDE_FLOAT16_VALUE(422.75), + SIMDE_FLOAT16_VALUE(604.50), SIMDE_FLOAT16_VALUE(-735.00)}, + {SIMDE_FLOAT16_VALUE(-21.31), SIMDE_FLOAT16_VALUE(-29.59), + SIMDE_FLOAT16_VALUE(-725.00), SIMDE_FLOAT16_VALUE(-503.00), + SIMDE_FLOAT16_VALUE(-75.56), SIMDE_FLOAT16_VALUE(215.38), + SIMDE_FLOAT16_VALUE(-742.00), SIMDE_FLOAT16_VALUE(-854.00)}, + INT32_C(0), + {SIMDE_FLOAT16_VALUE(-18960.00), SIMDE_FLOAT16_VALUE(18816.00), + SIMDE_FLOAT16_VALUE(17920.00), SIMDE_FLOAT16_VALUE(-17920.00), + SIMDE_FLOAT16_VALUE(9928.00), SIMDE_FLOAT16_VALUE(-9320.00), + SIMDE_FLOAT16_VALUE(-16072.00), SIMDE_FLOAT16_VALUE(15032.00)}}, + {{SIMDE_FLOAT16_VALUE(-618.50), SIMDE_FLOAT16_VALUE(164.88), + SIMDE_FLOAT16_VALUE(304.75), SIMDE_FLOAT16_VALUE(-530.50), + SIMDE_FLOAT16_VALUE(-603.50), SIMDE_FLOAT16_VALUE(730.50), + SIMDE_FLOAT16_VALUE(46.66), SIMDE_FLOAT16_VALUE(629.00)}, + {SIMDE_FLOAT16_VALUE(-537.00), SIMDE_FLOAT16_VALUE(637.00), + SIMDE_FLOAT16_VALUE(884.50), SIMDE_FLOAT16_VALUE(378.25), + SIMDE_FLOAT16_VALUE(-10.17), SIMDE_FLOAT16_VALUE(-730.00), + SIMDE_FLOAT16_VALUE(-981.50), SIMDE_FLOAT16_VALUE(453.25)}, + {SIMDE_FLOAT16_VALUE(-575.00), SIMDE_FLOAT16_VALUE(12.88), + SIMDE_FLOAT16_VALUE(-667.50), SIMDE_FLOAT16_VALUE(380.50), + SIMDE_FLOAT16_VALUE(374.75), SIMDE_FLOAT16_VALUE(-222.50), + SIMDE_FLOAT16_VALUE(206.88), SIMDE_FLOAT16_VALUE(502.25)}, + INT32_C(1), + {SIMDE_FLOAT16_VALUE(-8824.00), SIMDE_FLOAT16_VALUE(8368.00), + SIMDE_FLOAT16_VALUE(-4568.00), SIMDE_FLOAT16_VALUE(4344.00), + SIMDE_FLOAT16_VALUE(8800.00), SIMDE_FLOAT16_VALUE(-8672.00), + SIMDE_FLOAT16_VALUE(-5792.00), SIMDE_FLOAT16_VALUE(6468.00)}}, + {{SIMDE_FLOAT16_VALUE(-825.50), SIMDE_FLOAT16_VALUE(-472.75), + SIMDE_FLOAT16_VALUE(-531.00), SIMDE_FLOAT16_VALUE(-366.75), + SIMDE_FLOAT16_VALUE(143.12), SIMDE_FLOAT16_VALUE(698.50), + SIMDE_FLOAT16_VALUE(700.00), SIMDE_FLOAT16_VALUE(498.25)}, + {SIMDE_FLOAT16_VALUE(908.00), SIMDE_FLOAT16_VALUE(845.50), + SIMDE_FLOAT16_VALUE(-383.50), SIMDE_FLOAT16_VALUE(383.50), + SIMDE_FLOAT16_VALUE(357.75), SIMDE_FLOAT16_VALUE(-900.50), + SIMDE_FLOAT16_VALUE(-802.00), SIMDE_FLOAT16_VALUE(966.50)}, + {SIMDE_FLOAT16_VALUE(-993.00), SIMDE_FLOAT16_VALUE(477.50), + SIMDE_FLOAT16_VALUE(-23.00), SIMDE_FLOAT16_VALUE(102.38), + SIMDE_FLOAT16_VALUE(988.50), SIMDE_FLOAT16_VALUE(-311.75), + SIMDE_FLOAT16_VALUE(-668.50), SIMDE_FLOAT16_VALUE(148.25)}, + INT32_C(2), + {SIMDE_FLOAT16_VALUE(18624.00), SIMDE_FLOAT16_VALUE(-19920.00), + SIMDE_FLOAT16_VALUE(8288.00), SIMDE_FLOAT16_VALUE(-9184.00), + SIMDE_FLOAT16_VALUE(-20576.00), SIMDE_FLOAT16_VALUE(21408.00), + SIMDE_FLOAT16_VALUE(22928.00), SIMDE_FLOAT16_VALUE(-21728.00)}}, + {{SIMDE_FLOAT16_VALUE(213.88), SIMDE_FLOAT16_VALUE(337.75), + SIMDE_FLOAT16_VALUE(330.50), SIMDE_FLOAT16_VALUE(-88.56), + SIMDE_FLOAT16_VALUE(191.12), SIMDE_FLOAT16_VALUE(-615.00), + SIMDE_FLOAT16_VALUE(-113.25), SIMDE_FLOAT16_VALUE(775.50)}, + {SIMDE_FLOAT16_VALUE(295.50), SIMDE_FLOAT16_VALUE(687.00), + SIMDE_FLOAT16_VALUE(406.25), SIMDE_FLOAT16_VALUE(439.50), + SIMDE_FLOAT16_VALUE(-827.50), SIMDE_FLOAT16_VALUE(733.00), + SIMDE_FLOAT16_VALUE(499.00), SIMDE_FLOAT16_VALUE(931.00)}, + {SIMDE_FLOAT16_VALUE(790.00), SIMDE_FLOAT16_VALUE(-979.00), + SIMDE_FLOAT16_VALUE(70.62), SIMDE_FLOAT16_VALUE(-47.00), + SIMDE_FLOAT16_VALUE(228.50), SIMDE_FLOAT16_VALUE(-233.50), + SIMDE_FLOAT16_VALUE(-467.50), SIMDE_FLOAT16_VALUE(545.00)}, + INT32_C(3), + {SIMDE_FLOAT16_VALUE(32496.00), SIMDE_FLOAT16_VALUE(-31952.00), + SIMDE_FLOAT16_VALUE(20992.00), SIMDE_FLOAT16_VALUE(-20752.00), + SIMDE_FLOAT16_VALUE(34656.00), SIMDE_FLOAT16_VALUE(-35072.00), + SIMDE_FLOAT16_VALUE(43648.00), SIMDE_FLOAT16_VALUE(-42976.00)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); - simde_float16x8_t r; - // simde_float16x8_t r = simde_vcmlaq_rot90_laneq_f16(r_, a, b, test_vec[i].lane); - // simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); - SIMDE_CONSTIFY_4_(simde_vcmlaq_rot90_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + simde_float16x8_t r; + // simde_float16x8_t r = simde_vcmlaq_rot90_laneq_f16(r_, a, b, + // test_vec[i].lane); simde_test_arm_neon_write_f16x8(2, r, + // SIMDE_TEST_VEC_POS_LAST); + SIMDE_CONSTIFY_4_( + simde_vcmlaq_rot90_laneq_f16, r, + (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + test_vec[i].lane, r_, a, b); - simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), + 1); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 2, 3, 0, 1, 2, 3}; + for (int i = 0; i < 8; i++) { simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t r = simde_vcmlaq_laneq_f16(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -664,8 +893,7 @@ test_simde_vcmlaq_rot90_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int -test_simde_vcmlaq_rot90_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { +static int test_simde_vcmlaq_rot90_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[4]; @@ -674,71 +902,106 @@ test_simde_vcmlaq_rot90_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[4]; } test_vec[] = { - { { SIMDE_FLOAT32_C( 355.18), SIMDE_FLOAT32_C( 169.63), SIMDE_FLOAT32_C( 116.87), SIMDE_FLOAT32_C( -467.19) }, - { SIMDE_FLOAT32_C( -513.94), SIMDE_FLOAT32_C( -999.71), SIMDE_FLOAT32_C( -285.25), SIMDE_FLOAT32_C( -931.88) }, - { SIMDE_FLOAT32_C( -839.57), SIMDE_FLOAT32_C( -681.40), SIMDE_FLOAT32_C( -117.60), SIMDE_FLOAT32_C( -459.86) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-838971.375000), SIMDE_FLOAT32_C(839496.187500), SIMDE_FLOAT32_C(-782261.625000), SIMDE_FLOAT32_C(781911.312500) } }, - { { SIMDE_FLOAT32_C( -57.67), SIMDE_FLOAT32_C( -897.07), SIMDE_FLOAT32_C( 118.98), SIMDE_FLOAT32_C( -387.92) }, - { SIMDE_FLOAT32_C( -362.79), SIMDE_FLOAT32_C( 160.99), SIMDE_FLOAT32_C( -2.72), SIMDE_FLOAT32_C( 206.65) }, - { SIMDE_FLOAT32_C( 49.34), SIMDE_FLOAT32_C( 511.85), SIMDE_FLOAT32_C( 547.20), SIMDE_FLOAT32_C( -119.58) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(-82460.406250), SIMDE_FLOAT32_C(81505.664062), SIMDE_FLOAT32_C(-105654.820312), SIMDE_FLOAT32_C(105385.882812) } }, - { { SIMDE_FLOAT32_C( -219.54), SIMDE_FLOAT32_C( -959.14), SIMDE_FLOAT32_C( 943.92), SIMDE_FLOAT32_C( 628.48) }, - { SIMDE_FLOAT32_C( 446.65), SIMDE_FLOAT32_C( -500.77), SIMDE_FLOAT32_C( -347.79), SIMDE_FLOAT32_C( 813.11) }, - { SIMDE_FLOAT32_C( -542.25), SIMDE_FLOAT32_C( 232.48), SIMDE_FLOAT32_C( 684.35), SIMDE_FLOAT32_C( 710.26) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(-271762.062500), SIMDE_FLOAT32_C(270583.375000), SIMDE_FLOAT32_C(441852.812500), SIMDE_FLOAT32_C(-440280.406250) } }, - { { SIMDE_FLOAT32_C( 783.09), SIMDE_FLOAT32_C( -727.02), SIMDE_FLOAT32_C( -586.46), SIMDE_FLOAT32_C( 64.33) }, - { SIMDE_FLOAT32_C( -490.08), SIMDE_FLOAT32_C( 740.49), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -759.78) }, - { SIMDE_FLOAT32_C( -380.84), SIMDE_FLOAT32_C( 993.01), SIMDE_FLOAT32_C( -759.56), SIMDE_FLOAT32_C( 861.16) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(-734530.875000), SIMDE_FLOAT32_C(734586.937500), SIMDE_FLOAT32_C(753882.687500), SIMDE_FLOAT32_C(-754404.875000) } }, - { { SIMDE_FLOAT32_C( 998.31), SIMDE_FLOAT32_C( 538.40), SIMDE_FLOAT32_C( -191.12), SIMDE_FLOAT32_C( -434.48) }, - { SIMDE_FLOAT32_C( 592.83), SIMDE_FLOAT32_C( 820.32), SIMDE_FLOAT32_C( -296.84), SIMDE_FLOAT32_C( -612.30) }, - { SIMDE_FLOAT32_C( -552.34), SIMDE_FLOAT32_C( 329.08), SIMDE_FLOAT32_C( 765.26), SIMDE_FLOAT32_C( -531.08) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(454093.875000), SIMDE_FLOAT32_C(-452557.187500), SIMDE_FLOAT32_C(-338388.906250), SIMDE_FLOAT32_C(337763.312500) } }, - { { SIMDE_FLOAT32_C( 52.61), SIMDE_FLOAT32_C( 606.93), SIMDE_FLOAT32_C( -894.22), SIMDE_FLOAT32_C( -854.38) }, - { SIMDE_FLOAT32_C( 972.80), SIMDE_FLOAT32_C( -807.39), SIMDE_FLOAT32_C( 668.59), SIMDE_FLOAT32_C( -228.19) }, - { SIMDE_FLOAT32_C( -528.51), SIMDE_FLOAT32_C( 730.93), SIMDE_FLOAT32_C( -230.95), SIMDE_FLOAT32_C( -140.17) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(590198.187500), SIMDE_FLOAT32_C(-589538.625000), SIMDE_FLOAT32_C(165896.703125), SIMDE_FLOAT32_C(-167645.296875) } }, - { { SIMDE_FLOAT32_C( 556.73), SIMDE_FLOAT32_C( -701.90), SIMDE_FLOAT32_C( -356.50), SIMDE_FLOAT32_C( -532.42) }, - { SIMDE_FLOAT32_C( 856.94), SIMDE_FLOAT32_C( -261.67), SIMDE_FLOAT32_C( -208.07), SIMDE_FLOAT32_C( 27.93) }, - { SIMDE_FLOAT32_C( 924.32), SIMDE_FLOAT32_C( -863.60), SIMDE_FLOAT32_C( -687.65), SIMDE_FLOAT32_C( 238.39) }, - INT32_C( 0), - { SIMDE_FLOAT32_C(242423.562500), SIMDE_FLOAT32_C(-242568.734375), SIMDE_FLOAT32_C(-26172.757812), SIMDE_FLOAT32_C(25283.837891) } }, - { { SIMDE_FLOAT32_C( -286.79), SIMDE_FLOAT32_C( 630.61), SIMDE_FLOAT32_C( -989.22), SIMDE_FLOAT32_C( 223.21) }, - { SIMDE_FLOAT32_C( 812.31), SIMDE_FLOAT32_C( 667.33), SIMDE_FLOAT32_C( 841.41), SIMDE_FLOAT32_C( 735.52) }, - { SIMDE_FLOAT32_C( 308.52), SIMDE_FLOAT32_C( -189.06), SIMDE_FLOAT32_C( -63.33), SIMDE_FLOAT32_C( 837.76) }, - INT32_C( 1), - { SIMDE_FLOAT32_C(125878.625000), SIMDE_FLOAT32_C(-125534.804688), SIMDE_FLOAT32_C(138068.187500), SIMDE_FLOAT32_C(-138834.203125) } } - }; + {{SIMDE_FLOAT32_C(355.18), SIMDE_FLOAT32_C(169.63), + SIMDE_FLOAT32_C(116.87), SIMDE_FLOAT32_C(-467.19)}, + {SIMDE_FLOAT32_C(-513.94), SIMDE_FLOAT32_C(-999.71), + SIMDE_FLOAT32_C(-285.25), SIMDE_FLOAT32_C(-931.88)}, + {SIMDE_FLOAT32_C(-839.57), SIMDE_FLOAT32_C(-681.40), + SIMDE_FLOAT32_C(-117.60), SIMDE_FLOAT32_C(-459.86)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-838971.375000), SIMDE_FLOAT32_C(839496.187500), + SIMDE_FLOAT32_C(-782261.625000), SIMDE_FLOAT32_C(781911.312500)}}, + {{SIMDE_FLOAT32_C(-57.67), SIMDE_FLOAT32_C(-897.07), + SIMDE_FLOAT32_C(118.98), SIMDE_FLOAT32_C(-387.92)}, + {SIMDE_FLOAT32_C(-362.79), SIMDE_FLOAT32_C(160.99), + SIMDE_FLOAT32_C(-2.72), SIMDE_FLOAT32_C(206.65)}, + {SIMDE_FLOAT32_C(49.34), SIMDE_FLOAT32_C(511.85), + SIMDE_FLOAT32_C(547.20), SIMDE_FLOAT32_C(-119.58)}, + INT32_C(1), + {SIMDE_FLOAT32_C(-82460.406250), SIMDE_FLOAT32_C(81505.664062), + SIMDE_FLOAT32_C(-105654.820312), SIMDE_FLOAT32_C(105385.882812)}}, + {{SIMDE_FLOAT32_C(-219.54), SIMDE_FLOAT32_C(-959.14), + SIMDE_FLOAT32_C(943.92), SIMDE_FLOAT32_C(628.48)}, + {SIMDE_FLOAT32_C(446.65), SIMDE_FLOAT32_C(-500.77), + SIMDE_FLOAT32_C(-347.79), SIMDE_FLOAT32_C(813.11)}, + {SIMDE_FLOAT32_C(-542.25), SIMDE_FLOAT32_C(232.48), + SIMDE_FLOAT32_C(684.35), SIMDE_FLOAT32_C(710.26)}, + INT32_C(0), + {SIMDE_FLOAT32_C(-271762.062500), SIMDE_FLOAT32_C(270583.375000), + SIMDE_FLOAT32_C(441852.812500), SIMDE_FLOAT32_C(-440280.406250)}}, + {{SIMDE_FLOAT32_C(783.09), SIMDE_FLOAT32_C(-727.02), + SIMDE_FLOAT32_C(-586.46), SIMDE_FLOAT32_C(64.33)}, + {SIMDE_FLOAT32_C(-490.08), SIMDE_FLOAT32_C(740.49), + SIMDE_FLOAT32_C(-591.56), SIMDE_FLOAT32_C(-759.78)}, + {SIMDE_FLOAT32_C(-380.84), SIMDE_FLOAT32_C(993.01), + SIMDE_FLOAT32_C(-759.56), SIMDE_FLOAT32_C(861.16)}, + INT32_C(1), + {SIMDE_FLOAT32_C(-734530.875000), SIMDE_FLOAT32_C(734586.937500), + SIMDE_FLOAT32_C(753882.687500), SIMDE_FLOAT32_C(-754404.875000)}}, + {{SIMDE_FLOAT32_C(998.31), SIMDE_FLOAT32_C(538.40), + SIMDE_FLOAT32_C(-191.12), SIMDE_FLOAT32_C(-434.48)}, + {SIMDE_FLOAT32_C(592.83), SIMDE_FLOAT32_C(820.32), + SIMDE_FLOAT32_C(-296.84), SIMDE_FLOAT32_C(-612.30)}, + {SIMDE_FLOAT32_C(-552.34), SIMDE_FLOAT32_C(329.08), + SIMDE_FLOAT32_C(765.26), SIMDE_FLOAT32_C(-531.08)}, + INT32_C(0), + {SIMDE_FLOAT32_C(454093.875000), SIMDE_FLOAT32_C(-452557.187500), + SIMDE_FLOAT32_C(-338388.906250), SIMDE_FLOAT32_C(337763.312500)}}, + {{SIMDE_FLOAT32_C(52.61), SIMDE_FLOAT32_C(606.93), + SIMDE_FLOAT32_C(-894.22), SIMDE_FLOAT32_C(-854.38)}, + {SIMDE_FLOAT32_C(972.80), SIMDE_FLOAT32_C(-807.39), + SIMDE_FLOAT32_C(668.59), SIMDE_FLOAT32_C(-228.19)}, + {SIMDE_FLOAT32_C(-528.51), SIMDE_FLOAT32_C(730.93), + SIMDE_FLOAT32_C(-230.95), SIMDE_FLOAT32_C(-140.17)}, + INT32_C(1), + {SIMDE_FLOAT32_C(590198.187500), SIMDE_FLOAT32_C(-589538.625000), + SIMDE_FLOAT32_C(165896.703125), SIMDE_FLOAT32_C(-167645.296875)}}, + {{SIMDE_FLOAT32_C(556.73), SIMDE_FLOAT32_C(-701.90), + SIMDE_FLOAT32_C(-356.50), SIMDE_FLOAT32_C(-532.42)}, + {SIMDE_FLOAT32_C(856.94), SIMDE_FLOAT32_C(-261.67), + SIMDE_FLOAT32_C(-208.07), SIMDE_FLOAT32_C(27.93)}, + {SIMDE_FLOAT32_C(924.32), SIMDE_FLOAT32_C(-863.60), + SIMDE_FLOAT32_C(-687.65), SIMDE_FLOAT32_C(238.39)}, + INT32_C(0), + {SIMDE_FLOAT32_C(242423.562500), SIMDE_FLOAT32_C(-242568.734375), + SIMDE_FLOAT32_C(-26172.757812), SIMDE_FLOAT32_C(25283.837891)}}, + {{SIMDE_FLOAT32_C(-286.79), SIMDE_FLOAT32_C(630.61), + SIMDE_FLOAT32_C(-989.22), SIMDE_FLOAT32_C(223.21)}, + {SIMDE_FLOAT32_C(812.31), SIMDE_FLOAT32_C(667.33), + SIMDE_FLOAT32_C(841.41), SIMDE_FLOAT32_C(735.52)}, + {SIMDE_FLOAT32_C(308.52), SIMDE_FLOAT32_C(-189.06), + SIMDE_FLOAT32_C(-63.33), SIMDE_FLOAT32_C(837.76)}, + INT32_C(1), + {SIMDE_FLOAT32_C(125878.625000), SIMDE_FLOAT32_C(-125534.804688), + SIMDE_FLOAT32_C(138068.187500), SIMDE_FLOAT32_C(-138834.203125)}}}; - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r; - SIMDE_CONSTIFY_2_(simde_vcmlaq_rot90_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_( + simde_vcmlaq_rot90_laneq_f32, r, + (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), + test_vec[i].lane, r_, a, b); - simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); - // simde_float32x4_t r = simde_vcmlaq_rot90_laneq_f32(r_, a, b, test_vec[i].lane); - // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), + 1); + // simde_float32x4_t r = simde_vcmlaq_rot90_laneq_f32(r_, a, b, + // test_vec[i].lane); simde_test_arm_neon_write_f32x4(2, r, + // SIMDE_TEST_VEC_POS_LAST); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - for (int i = 0 ; i < 8 ; i++) { + const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; + for (int i = 0; i < 8; i++) { simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcmlaq_rot90_laneq_f32(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); From 318c72ba07f79f1efa856a65406f03eee72f3b31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Mon, 16 Oct 2023 19:51:55 +0800 Subject: [PATCH 10/29] [Fix] : add newline --- simde/arm/neon/cadd_rot270.h | 2 +- simde/arm/neon/cadd_rot90.h | 2 +- simde/arm/neon/cmla_lane.h | 2 +- simde/arm/neon/cmla_rot180_lane.h | 2 +- simde/arm/neon/cmla_rot270_lane.h | 2 +- simde/arm/neon/cmla_rot90_lane.h | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/simde/arm/neon/cadd_rot270.h b/simde/arm/neon/cadd_rot270.h index 1d0f1c833..88f7f4fbc 100644 --- a/simde/arm/neon/cadd_rot270.h +++ b/simde/arm/neon/cadd_rot270.h @@ -191,4 +191,4 @@ simde_float64x2_t simde_vcaddq_rot270_f64(simde_float64x2_t a, SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_CADD_ROT270_H) */ \ No newline at end of file +#endif /* !defined(SIMDE_ARM_NEON_CADD_ROT270_H) */ diff --git a/simde/arm/neon/cadd_rot90.h b/simde/arm/neon/cadd_rot90.h index 4391aa47a..5a51acc29 100644 --- a/simde/arm/neon/cadd_rot90.h +++ b/simde/arm/neon/cadd_rot90.h @@ -191,4 +191,4 @@ simde_float64x2_t simde_vcaddq_rot90_f64(simde_float64x2_t a, SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_CADD_ROT90_H) */ \ No newline at end of file +#endif /* !defined(SIMDE_ARM_NEON_CADD_ROT90_H) */ diff --git a/simde/arm/neon/cmla_lane.h b/simde/arm/neon/cmla_lane.h index ae9bc9b30..471b54b2a 100644 --- a/simde/arm/neon/cmla_lane.h +++ b/simde/arm/neon/cmla_lane.h @@ -330,4 +330,4 @@ simde_float32x4_t simde_vcmlaq_laneq_f32(simde_float32x4_t r, SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_CMLA_LANE_H) */ \ No newline at end of file +#endif /* !defined(SIMDE_ARM_NEON_CMLA_LANE_H) */ diff --git a/simde/arm/neon/cmla_rot180_lane.h b/simde/arm/neon/cmla_rot180_lane.h index 1e5c198e1..cd55ea429 100644 --- a/simde/arm/neon/cmla_rot180_lane.h +++ b/simde/arm/neon/cmla_rot180_lane.h @@ -363,4 +363,4 @@ simde_float32x4_t simde_vcmlaq_rot180_laneq_f32(simde_float32x4_t r, SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT180_LANE_H) */ \ No newline at end of file +#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT180_LANE_H) */ diff --git a/simde/arm/neon/cmla_rot270_lane.h b/simde/arm/neon/cmla_rot270_lane.h index ad82c5704..ad82dd945 100644 --- a/simde/arm/neon/cmla_rot270_lane.h +++ b/simde/arm/neon/cmla_rot270_lane.h @@ -362,4 +362,4 @@ simde_float32x4_t simde_vcmlaq_rot270_laneq_f32(simde_float32x4_t r, SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT270_LANE_H) */ \ No newline at end of file +#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT270_LANE_H) */ diff --git a/simde/arm/neon/cmla_rot90_lane.h b/simde/arm/neon/cmla_rot90_lane.h index d5f766a74..7e0c6af92 100644 --- a/simde/arm/neon/cmla_rot90_lane.h +++ b/simde/arm/neon/cmla_rot90_lane.h @@ -360,4 +360,4 @@ simde_float32x4_t simde_vcmlaq_rot90_laneq_f32(simde_float32x4_t r, SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT90_LANE_H) */ \ No newline at end of file +#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT90_LANE_H) */ From 5866e62c7e5bedb76570823e53e97746e02e2926 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Mon, 16 Oct 2023 20:20:51 +0800 Subject: [PATCH 11/29] [Fix] : invalid operands to binary expression for f16 --- simde/arm/neon/cadd_rot270.h | 11 ----------- simde/arm/neon/cadd_rot90.h | 14 -------------- simde/arm/neon/cmla_lane.h | 10 ---------- simde/arm/neon/cmla_rot180_lane.h | 28 ---------------------------- simde/arm/neon/cmla_rot270_lane.h | 28 ---------------------------- simde/arm/neon/cmla_rot90_lane.h | 28 ---------------------------- 6 files changed, 119 deletions(-) diff --git a/simde/arm/neon/cadd_rot270.h b/simde/arm/neon/cadd_rot270.h index 88f7f4fbc..1e9837a9c 100644 --- a/simde/arm/neon/cadd_rot270.h +++ b/simde/arm/neon/cadd_rot270.h @@ -43,17 +43,12 @@ simde_float16x4_t simde_vcadd_rot270_f16(simde_float16x4_t a, simde_float16x4_private r_, a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(b); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); - r_.values = b_.values + a_.values; -#else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; } -#endif return simde_float16x4_from_private(r_); #endif @@ -74,18 +69,12 @@ simde_float16x8_t simde_vcaddq_rot270_f16(simde_float16x8_t a, simde_float16x8_private r_, a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(b); -#if defined(SIMDE_SHUFFLE_VECTOR_) - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, - 13, 4, 15, 6); - r_.values = b_.values + a_.values; -#else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; } -#endif return simde_float16x8_from_private(r_); #endif diff --git a/simde/arm/neon/cadd_rot90.h b/simde/arm/neon/cadd_rot90.h index 5a51acc29..6c4dfc5c4 100644 --- a/simde/arm/neon/cadd_rot90.h +++ b/simde/arm/neon/cadd_rot90.h @@ -43,18 +43,12 @@ simde_float16x4_t simde_vcadd_rot90_f16(simde_float16x4_t a, simde_float16x4_private r_, a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(b); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); - r_.values = b_.values + a_.values; -#else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; } -#endif - return simde_float16x4_from_private(r_); #endif } @@ -73,20 +67,12 @@ simde_float16x8_t simde_vcaddq_rot90_f16(simde_float16x8_t a, #else simde_float16x8_private r_, a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(b); - -#if defined(SIMDE_SHUFFLE_VECTOR_) - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, - 5, 12, 7, 14); - r_.values = b_.values + a_.values; -#else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; } -#endif - return simde_float16x8_from_private(r_); #endif } diff --git a/simde/arm/neon/cmla_lane.h b/simde/arm/neon/cmla_lane.h index 471b54b2a..d8cadfe36 100644 --- a/simde/arm/neon/cmla_lane.h +++ b/simde/arm/neon/cmla_lane.h @@ -51,17 +51,12 @@ simde_float16x4_t simde_vcmla_lane_f16(simde_float16x4_t r, simde_float16x4_t a, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x4_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); - r_.values += b_.values * a_.values; -#else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; } -#endif result = simde_float16x4_from_private(r_); return result; #endif @@ -122,17 +117,12 @@ simde_float16x4_t simde_vcmla_laneq_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); - r_.values += b_.values * a_.values; -#else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; } -#endif result = simde_float16x4_from_private(r_); return result; #endif diff --git a/simde/arm/neon/cmla_rot180_lane.h b/simde/arm/neon/cmla_rot180_lane.h index cd55ea429..8d0313eb8 100644 --- a/simde/arm/neon/cmla_rot180_lane.h +++ b/simde/arm/neon/cmla_rot180_lane.h @@ -52,18 +52,12 @@ simde_float16x4_t simde_vcmla_rot180_lane_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x4_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); - r_.values += b_.values * a_.values; -#else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; } -#endif result = simde_float16x4_from_private(r_); return result; @@ -131,20 +125,12 @@ simde_float16x8_t simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x4_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, - 4, 6, 6); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, - 5, 6, 7); - r_.values += b_.values * a_.values; -#else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; } -#endif result = simde_float16x8_from_private(r_); return result; @@ -212,18 +198,12 @@ simde_float16x4_t simde_vcmla_rot180_laneq_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); - r_.values += b_.values * a_.values; -#else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; } -#endif result = simde_float16x4_from_private(r_); return result; @@ -294,20 +274,12 @@ simde_float16x8_t simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x8_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, - 4, 6, 6); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, - 5, 6, 7); - r_.values += b_.values * a_.values; -#else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; } -#endif result = simde_float16x8_from_private(r_); return result; diff --git a/simde/arm/neon/cmla_rot270_lane.h b/simde/arm/neon/cmla_rot270_lane.h index ad82dd945..fcd750b71 100644 --- a/simde/arm/neon/cmla_rot270_lane.h +++ b/simde/arm/neon/cmla_rot270_lane.h @@ -51,18 +51,12 @@ simde_float16x4_t simde_vcmla_rot270_lane_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x4_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); - r_.values += b_.values * a_.values; -#else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; } -#endif result = simde_float16x4_from_private(r_); return result; @@ -130,20 +124,12 @@ simde_float16x8_t simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x4_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, - 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, - 13, 4, 15, 6); - r_.values += b_.values * a_.values; -#else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; } -#endif result = simde_float16x8_from_private(r_); return result; @@ -211,18 +197,12 @@ simde_float16x4_t simde_vcmla_rot270_laneq_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); - r_.values += b_.values * a_.values; -#else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; } -#endif result = simde_float16x4_from_private(r_); return result; @@ -293,20 +273,12 @@ simde_float16x8_t simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x8_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, - 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, - 13, 4, 15, 6); - r_.values += b_.values * a_.values; -#else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; } -#endif result = simde_float16x8_from_private(r_); return result; diff --git a/simde/arm/neon/cmla_rot90_lane.h b/simde/arm/neon/cmla_rot90_lane.h index 7e0c6af92..be6e33e17 100644 --- a/simde/arm/neon/cmla_rot90_lane.h +++ b/simde/arm/neon/cmla_rot90_lane.h @@ -51,18 +51,12 @@ simde_float16x4_t simde_vcmla_rot90_lane_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x4_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); - r_.values += b_.values * a_.values; -#else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; } -#endif result = simde_float16x4_from_private(r_); return result; #endif @@ -129,20 +123,12 @@ simde_float16x8_t simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x4_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, - 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, - 5, 12, 7, 14); - r_.values += b_.values * a_.values; -#else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; } -#endif result = simde_float16x8_from_private(r_); return result; @@ -210,18 +196,12 @@ simde_float16x4_t simde_vcmla_rot90_laneq_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); - r_.values += b_.values * a_.values; -#else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; } -#endif result = simde_float16x4_from_private(r_); return result; @@ -292,20 +272,12 @@ simde_float16x8_t simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x8_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, - 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, - 5, 12, 7, 14); - r_.values += b_.values * a_.values; -#else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; } -#endif result = simde_float16x8_from_private(r_); return result; From 7a7cc0385458e1608cddd2a421dab62fbf0f13f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Mon, 16 Oct 2023 20:51:47 +0800 Subject: [PATCH 12/29] [Fix] : operation for f16 --- simde/arm/neon/cadd_rot270.h | 10 ++++--- simde/arm/neon/cadd_rot90.h | 13 +++++++--- simde/arm/neon/cmla_lane.h | 43 ++++++++++++++++--------------- simde/arm/neon/cmla_rot180_lane.h | 34 ++++++++++++++++++------ simde/arm/neon/cmla_rot270_lane.h | 34 ++++++++++++++++++------ simde/arm/neon/cmla_rot90_lane.h | 34 ++++++++++++++++++------ 6 files changed, 115 insertions(+), 53 deletions(-) diff --git a/simde/arm/neon/cadd_rot270.h b/simde/arm/neon/cadd_rot270.h index 1e9837a9c..c27174286 100644 --- a/simde/arm/neon/cadd_rot270.h +++ b/simde/arm/neon/cadd_rot270.h @@ -27,6 +27,7 @@ #if !defined(SIMDE_ARM_NEON_CADD_ROT270_H) #define SIMDE_ARM_NEON_CADD_ROT270_H +#include "add.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS @@ -46,8 +47,9 @@ simde_float16x4_t simde_vcadd_rot270_f16(simde_float16x4_t a, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; - r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; + r_.values[2 * i] = simde_vaddh_f16(b_.values[2 * i + 1], a_.values[2 * i]); + r_.values[2 * i + 1] = + simde_vaddh_f16(-(b_.values[2 * i]), a_.values[2 * i + 1]); } return simde_float16x4_from_private(r_); @@ -72,8 +74,8 @@ simde_float16x8_t simde_vcaddq_rot270_f16(simde_float16x8_t a, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; - r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; + r_.values[2 * i] = (b_.values[2 * i + 1], a_.values[2 * i]); + r_.values[2 * i + 1] = (-(b_.values[2 * i]), a_.values[2 * i + 1]); } return simde_float16x8_from_private(r_); diff --git a/simde/arm/neon/cadd_rot90.h b/simde/arm/neon/cadd_rot90.h index 6c4dfc5c4..a1514ca1c 100644 --- a/simde/arm/neon/cadd_rot90.h +++ b/simde/arm/neon/cadd_rot90.h @@ -27,6 +27,7 @@ #if !defined(SIMDE_ARM_NEON_CADD_ROT90_H) #define SIMDE_ARM_NEON_CADD_ROT90_H +#include "add.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS @@ -46,8 +47,10 @@ simde_float16x4_t simde_vcadd_rot90_f16(simde_float16x4_t a, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; - r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; + r_.values[2 * i] = + simde_vaddh_f16(-(b_.values[2 * i + 1]), a_.values[2 * i]); + r_.values[2 * i + 1] = + simde_vaddh_f16(b_.values[2 * i], a_.values[2 * i + 1]); } return simde_float16x4_from_private(r_); #endif @@ -70,8 +73,10 @@ simde_float16x8_t simde_vcaddq_rot90_f16(simde_float16x8_t a, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; - r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; + r_.values[2 * i] = + simde_vaddh_f16(-(b_.values[2 * i + 1]), a_.values[2 * i]); + r_.values[2 * i + 1] = + simde_vaddh_f16(b_.values[2 * i], a_.values[2 * i + 1]); } return simde_float16x8_from_private(r_); #endif diff --git a/simde/arm/neon/cmla_lane.h b/simde/arm/neon/cmla_lane.h index d8cadfe36..91aa94722 100644 --- a/simde/arm/neon/cmla_lane.h +++ b/simde/arm/neon/cmla_lane.h @@ -27,9 +27,10 @@ #if !defined(SIMDE_ARM_NEON_CMLA_LANE_H) #define SIMDE_ARM_NEON_CMLA_LANE_H +#include "add.h" #include "dup_lane.h" +#include "mul.h" #include "types.h" - HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ @@ -54,8 +55,11 @@ simde_float16x4_t simde_vcmla_lane_f16(simde_float16x4_t r, simde_float16x4_t a, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i] = simde_vaddh_f16( + r_.values[2 * i], simde_vmulh_f16(b_.values[lane], a_.values[2 * i])); + r_.values[2 * i + 1] = + simde_vaddh_f16(r_.values[2 * i + 1], + simde_vmulh_f16(b_.values[lane], a_.values[2 * i])); } result = simde_float16x4_from_private(r_); return result; @@ -120,8 +124,11 @@ simde_float16x4_t simde_vcmla_laneq_f16(simde_float16x4_t r, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i] = simde_vaddh_f16( + r_.values[2 * i], simde_vmulh_f16(b_.values[lane], a_.values[2 * i])); + r_.values[2 * i + 1] = + simde_vaddh_f16(r_.values[2 * i + 1], + simde_vmulh_f16(b_.values[lane], a_.values[2 * i])); } result = simde_float16x4_from_private(r_); return result; @@ -186,18 +193,15 @@ simde_float16x8_t simde_vcmlaq_lane_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x4_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, - 4, 6, 6); - r_.values += b_.values * a_.values; -#else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i] = simde_vaddh_f16( + r_.values[2 * i], simde_vmulh_f16(b_.values[lane], a_.values[2 * i])); + r_.values[2 * i + 1] = + simde_vaddh_f16(r_.values[2 * i + 1], + simde_vmulh_f16(b_.values[lane], a_.values[2 * i])); } -#endif result = simde_float16x8_from_private(r_); return result; #endif @@ -259,18 +263,15 @@ simde_float16x8_t simde_vcmlaq_laneq_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x8_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, - 4, 6, 6); - r_.values += b_.values * a_.values; -#else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i] = simde_vaddh_f16( + r_.values[2 * i], simde_vmulh_f16(b_.values[lane], a_.values[2 * i])); + r_.values[2 * i + 1] = + simde_vaddh_f16(r_.values[2 * i + 1], + simde_vmulh_f16(b_.values[lane], a_.values[2 * i])); } -#endif result = simde_float16x8_from_private(r_); return result; #endif diff --git a/simde/arm/neon/cmla_rot180_lane.h b/simde/arm/neon/cmla_rot180_lane.h index 8d0313eb8..26c61bb39 100644 --- a/simde/arm/neon/cmla_rot180_lane.h +++ b/simde/arm/neon/cmla_rot180_lane.h @@ -27,7 +27,9 @@ #if !defined(SIMDE_ARM_NEON_CMLA_ROT180_LANE_H) #define SIMDE_ARM_NEON_CMLA_ROT180_LANE_H +#include "add.h" #include "dup_lane.h" +#include "mul.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS @@ -55,8 +57,12 @@ simde_float16x4_t simde_vcmla_rot180_lane_f16(simde_float16x4_t r, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + r_.values[2 * i] = + simde_vaddh_f16(r_.values[2 * i], + simde_vmulh_f16(-(b_.values[2 * i]), a_.values[2 * i])); + r_.values[2 * i + 1] += = simde_vaddh_f16( + r_.values[2 * i + 1], + simde_vmulh_f16(-(b_.values[2 * i + 1]) * a_.values[2 * i])); } result = simde_float16x4_from_private(r_); @@ -128,8 +134,12 @@ simde_float16x8_t simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + r_.values[2 * i] = + simde_vaddh_f16(r_.values[2 * i], + simde_vmulh_f16(-(b_.values[2 * i]), a_.values[2 * i])); + r_.values[2 * i + 1] += = simde_vaddh_f16( + r_.values[2 * i + 1], + simde_vmulh_f16(-(b_.values[2 * i + 1]) * a_.values[2 * i])); } result = simde_float16x8_from_private(r_); @@ -201,8 +211,12 @@ simde_float16x4_t simde_vcmla_rot180_laneq_f16(simde_float16x4_t r, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + r_.values[2 * i] = + simde_vaddh_f16(r_.values[2 * i], + simde_vmulh_f16(-(b_.values[2 * i]), a_.values[2 * i])); + r_.values[2 * i + 1] += = simde_vaddh_f16( + r_.values[2 * i + 1], + simde_vmulh_f16(-(b_.values[2 * i + 1]) * a_.values[2 * i])); } result = simde_float16x4_from_private(r_); @@ -277,8 +291,12 @@ simde_float16x8_t simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + r_.values[2 * i] = + simde_vaddh_f16(r_.values[2 * i], + simde_vmulh_f16(-(b_.values[2 * i]), a_.values[2 * i])); + r_.values[2 * i + 1] += = simde_vaddh_f16( + r_.values[2 * i + 1], + simde_vmulh_f16(-(b_.values[2 * i + 1]) * a_.values[2 * i])); } result = simde_float16x8_from_private(r_); diff --git a/simde/arm/neon/cmla_rot270_lane.h b/simde/arm/neon/cmla_rot270_lane.h index fcd750b71..dce2f610b 100644 --- a/simde/arm/neon/cmla_rot270_lane.h +++ b/simde/arm/neon/cmla_rot270_lane.h @@ -27,7 +27,9 @@ #if !defined(SIMDE_ARM_NEON_CMLA_ROT270_LANE_H) #define SIMDE_ARM_NEON_CMLA_ROT270_LANE_H +#include "add.h" #include "dup_lane.h" +#include "mul.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS @@ -54,8 +56,12 @@ simde_float16x4_t simde_vcmla_rot270_lane_f16(simde_float16x4_t r, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + r_.values[2 * i] = simde_vaddh_f16( + r_.values[2 * i], + simde_vmulh_f16(b_.values[2 * i + 1], a_.values[2 * i + 1])); + r_.values[2 * i + 1] = simde_vaddh_f16( + r_.values[2 * i + 1], + simde_vmulh_f16(-(b_.values[2 * i]), a_.values[2 * i + 1])); } result = simde_float16x4_from_private(r_); @@ -127,8 +133,12 @@ simde_float16x8_t simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + r_.values[2 * i] = simde_vaddh_f16( + r_.values[2 * i], + simde_vmulh_f16(b_.values[2 * i + 1], a_.values[2 * i + 1])); + r_.values[2 * i + 1] = simde_vaddh_f16( + r_.values[2 * i + 1], + simde_vmulh_f16(-(b_.values[2 * i]), a_.values[2 * i + 1])); } result = simde_float16x8_from_private(r_); @@ -200,8 +210,12 @@ simde_float16x4_t simde_vcmla_rot270_laneq_f16(simde_float16x4_t r, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + r_.values[2 * i] = simde_vaddh_f16( + r_.values[2 * i], + simde_vmulh_f16(b_.values[2 * i + 1], a_.values[2 * i + 1])); + r_.values[2 * i + 1] = simde_vaddh_f16( + r_.values[2 * i + 1], + simde_vmulh_f16(-(b_.values[2 * i]), a_.values[2 * i + 1])); } result = simde_float16x4_from_private(r_); @@ -276,8 +290,12 @@ simde_float16x8_t simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + r_.values[2 * i] = simde_vaddh_f16( + r_.values[2 * i], + simde_vmulh_f16(b_.values[2 * i + 1], a_.values[2 * i + 1])); + r_.values[2 * i + 1] = simde_vaddh_f16( + r_.values[2 * i + 1], + simde_vmulh_f16(-(b_.values[2 * i]), a_.values[2 * i + 1])); } result = simde_float16x8_from_private(r_); diff --git a/simde/arm/neon/cmla_rot90_lane.h b/simde/arm/neon/cmla_rot90_lane.h index be6e33e17..f6b333e58 100644 --- a/simde/arm/neon/cmla_rot90_lane.h +++ b/simde/arm/neon/cmla_rot90_lane.h @@ -27,7 +27,9 @@ #if !defined(SIMDE_ARM_NEON_CMLA_ROT90_LANE_H) #define SIMDE_ARM_NEON_CMLA_ROT90_LANE_H +#include "add.h" #include "dup_lane.h" +#include "mul.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS @@ -54,8 +56,12 @@ simde_float16x4_t simde_vcmla_rot90_lane_f16(simde_float16x4_t r, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + r_.values[2 * i] = simde_vaddh_f16( + r_.values[2 * i], + simde_vmulh_f16(-(b_.values[2 * i + 1]), a_.values[2 * i + 1])); + r_.values[2 * i + 1] = simde_vaddh_f16( + r_.values[2 * i + 1], + simde_vmulh_f16(b_.values[2 * i], a_.values[2 * i + 1])); } result = simde_float16x4_from_private(r_); return result; @@ -126,8 +132,12 @@ simde_float16x8_t simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + r_.values[2 * i] = simde_vaddh_f16( + r_.values[2 * i], + simde_vmulh_f16(-(b_.values[2 * i + 1]), a_.values[2 * i + 1])); + r_.values[2 * i + 1] = simde_vaddh_f16( + r_.values[2 * i + 1], + simde_vmulh_f16(b_.values[2 * i], a_.values[2 * i + 1])); } result = simde_float16x8_from_private(r_); @@ -199,8 +209,12 @@ simde_float16x4_t simde_vcmla_rot90_laneq_f16(simde_float16x4_t r, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + r_.values[2 * i] = simde_vaddh_f16( + r_.values[2 * i], + simde_vmulh_f16(-(b_.values[2 * i + 1]), a_.values[2 * i + 1])); + r_.values[2 * i + 1] = simde_vaddh_f16( + r_.values[2 * i + 1], + simde_vmulh_f16(b_.values[2 * i], a_.values[2 * i + 1])); } result = simde_float16x4_from_private(r_); @@ -275,8 +289,12 @@ simde_float16x8_t simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + r_.values[2 * i] = simde_vaddh_f16( + r_.values[2 * i], + simde_vmulh_f16(-(b_.values[2 * i + 1]), a_.values[2 * i + 1])); + r_.values[2 * i + 1] = simde_vaddh_f16( + r_.values[2 * i + 1], + simde_vmulh_f16(b_.values[2 * i], a_.values[2 * i + 1])); } result = simde_float16x8_from_private(r_); From 21fb644f18a87c63a40d7f9ffcb1201785df4195 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Mon, 16 Oct 2023 21:00:34 +0800 Subject: [PATCH 13/29] [Fix] : simde_vaddh_f16 missed --- simde/arm/neon/cadd_rot270.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/simde/arm/neon/cadd_rot270.h b/simde/arm/neon/cadd_rot270.h index c27174286..81fec3450 100644 --- a/simde/arm/neon/cadd_rot270.h +++ b/simde/arm/neon/cadd_rot270.h @@ -74,8 +74,9 @@ simde_float16x8_t simde_vcaddq_rot270_f16(simde_float16x8_t a, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] = (b_.values[2 * i + 1], a_.values[2 * i]); - r_.values[2 * i + 1] = (-(b_.values[2 * i]), a_.values[2 * i + 1]); + r_.values[2 * i] = simde_vaddh_f16(b_.values[2 * i + 1], a_.values[2 * i]); + r_.values[2 * i + 1] = + simde_vaddh_f16(-(b_.values[2 * i]), a_.values[2 * i + 1]); } return simde_float16x8_from_private(r_); From bf58e4d054242554ea6d440b5ad90f30350fe6c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Mon, 16 Oct 2023 21:42:26 +0800 Subject: [PATCH 14/29] [Fix] : invalid argument type 'simde_float16' to unary expression --- simde/arm/neon/cadd_rot270.h | 10 +++--- simde/arm/neon/cadd_rot90.h | 8 +++-- simde/arm/neon/cmla_rot180_lane.h | 56 ++++++++++++++++++++----------- simde/arm/neon/cmla_rot270_lane.h | 16 ++++++--- simde/arm/neon/cmla_rot90_lane.h | 16 ++++++--- 5 files changed, 72 insertions(+), 34 deletions(-) diff --git a/simde/arm/neon/cadd_rot270.h b/simde/arm/neon/cadd_rot270.h index 81fec3450..d3d1c0d26 100644 --- a/simde/arm/neon/cadd_rot270.h +++ b/simde/arm/neon/cadd_rot270.h @@ -48,8 +48,9 @@ simde_float16x4_t simde_vcadd_rot270_f16(simde_float16x4_t a, for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] = simde_vaddh_f16(b_.values[2 * i + 1], a_.values[2 * i]); - r_.values[2 * i + 1] = - simde_vaddh_f16(-(b_.values[2 * i]), a_.values[2 * i + 1]); + r_.values[2 * i + 1] = simde_vaddh_f16( + simde_float16_from_float32(-simde_float16_to_float32(b_.values[2 * i])), + a_.values[2 * i + 1]); } return simde_float16x4_from_private(r_); @@ -75,8 +76,9 @@ simde_float16x8_t simde_vcaddq_rot270_f16(simde_float16x8_t a, for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] = simde_vaddh_f16(b_.values[2 * i + 1], a_.values[2 * i]); - r_.values[2 * i + 1] = - simde_vaddh_f16(-(b_.values[2 * i]), a_.values[2 * i + 1]); + r_.values[2 * i + 1] = simde_vaddh_f16( + simde_float16_from_float32(-simde_float16_to_float32(b_.values[2 * i])), + a_.values[2 * i + 1]); } return simde_float16x8_from_private(r_); diff --git a/simde/arm/neon/cadd_rot90.h b/simde/arm/neon/cadd_rot90.h index a1514ca1c..df409c1fc 100644 --- a/simde/arm/neon/cadd_rot90.h +++ b/simde/arm/neon/cadd_rot90.h @@ -48,7 +48,9 @@ simde_float16x4_t simde_vcadd_rot90_f16(simde_float16x4_t a, for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] = - simde_vaddh_f16(-(b_.values[2 * i + 1]), a_.values[2 * i]); + simde_vaddh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i + 1])), + a_.values[2 * i]); r_.values[2 * i + 1] = simde_vaddh_f16(b_.values[2 * i], a_.values[2 * i + 1]); } @@ -74,7 +76,9 @@ simde_float16x8_t simde_vcaddq_rot90_f16(simde_float16x8_t a, for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] = - simde_vaddh_f16(-(b_.values[2 * i + 1]), a_.values[2 * i]); + simde_vaddh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i + 1])), + a_.values[2 * i]); r_.values[2 * i + 1] = simde_vaddh_f16(b_.values[2 * i], a_.values[2 * i + 1]); } diff --git a/simde/arm/neon/cmla_rot180_lane.h b/simde/arm/neon/cmla_rot180_lane.h index 26c61bb39..753e13b71 100644 --- a/simde/arm/neon/cmla_rot180_lane.h +++ b/simde/arm/neon/cmla_rot180_lane.h @@ -57,12 +57,16 @@ simde_float16x4_t simde_vcmla_rot180_lane_f16(simde_float16x4_t r, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] = - simde_vaddh_f16(r_.values[2 * i], - simde_vmulh_f16(-(b_.values[2 * i]), a_.values[2 * i])); - r_.values[2 * i + 1] += = simde_vaddh_f16( + r_.values[2 * i] = simde_vaddh_f16( + r_.values[2 * i], + simde_vmulh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i])), + a_.values[2 * i])); + r_.values[2 * i + 1] = simde_vaddh_f16( r_.values[2 * i + 1], - simde_vmulh_f16(-(b_.values[2 * i + 1]) * a_.values[2 * i])); + simde_vmulh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i + 1])), + a_.values[2 * i])); } result = simde_float16x4_from_private(r_); @@ -134,12 +138,16 @@ simde_float16x8_t simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] = - simde_vaddh_f16(r_.values[2 * i], - simde_vmulh_f16(-(b_.values[2 * i]), a_.values[2 * i])); - r_.values[2 * i + 1] += = simde_vaddh_f16( + r_.values[2 * i] = simde_vaddh_f16( + r_.values[2 * i], + simde_vmulh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i])), + a_.values[2 * i])); + r_.values[2 * i + 1] = simde_vaddh_f16( r_.values[2 * i + 1], - simde_vmulh_f16(-(b_.values[2 * i + 1]) * a_.values[2 * i])); + simde_vmulh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i + 1])), + a_.values[2 * i])); } result = simde_float16x8_from_private(r_); @@ -211,12 +219,16 @@ simde_float16x4_t simde_vcmla_rot180_laneq_f16(simde_float16x4_t r, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] = - simde_vaddh_f16(r_.values[2 * i], - simde_vmulh_f16(-(b_.values[2 * i]), a_.values[2 * i])); - r_.values[2 * i + 1] += = simde_vaddh_f16( + r_.values[2 * i] = simde_vaddh_f16( + r_.values[2 * i], + simde_vmulh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i])), + a_.values[2 * i])); + r_.values[2 * i + 1] = simde_vaddh_f16( r_.values[2 * i + 1], - simde_vmulh_f16(-(b_.values[2 * i + 1]) * a_.values[2 * i])); + simde_vmulh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i + 1])), + a_.values[2 * i])); } result = simde_float16x4_from_private(r_); @@ -291,12 +303,16 @@ simde_float16x8_t simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] = - simde_vaddh_f16(r_.values[2 * i], - simde_vmulh_f16(-(b_.values[2 * i]), a_.values[2 * i])); - r_.values[2 * i + 1] += = simde_vaddh_f16( + r_.values[2 * i] = simde_vaddh_f16( + r_.values[2 * i], + simde_vmulh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i])), + a_.values[2 * i])); + r_.values[2 * i + 1] = simde_vaddh_f16( r_.values[2 * i + 1], - simde_vmulh_f16(-(b_.values[2 * i + 1]) * a_.values[2 * i])); + simde_vmulh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i + 1])), + a_.values[2 * i])); } result = simde_float16x8_from_private(r_); diff --git a/simde/arm/neon/cmla_rot270_lane.h b/simde/arm/neon/cmla_rot270_lane.h index dce2f610b..d2fc749e2 100644 --- a/simde/arm/neon/cmla_rot270_lane.h +++ b/simde/arm/neon/cmla_rot270_lane.h @@ -61,7 +61,9 @@ simde_float16x4_t simde_vcmla_rot270_lane_f16(simde_float16x4_t r, simde_vmulh_f16(b_.values[2 * i + 1], a_.values[2 * i + 1])); r_.values[2 * i + 1] = simde_vaddh_f16( r_.values[2 * i + 1], - simde_vmulh_f16(-(b_.values[2 * i]), a_.values[2 * i + 1])); + simde_vmulh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i])), + a_.values[2 * i + 1])); } result = simde_float16x4_from_private(r_); @@ -138,7 +140,9 @@ simde_float16x8_t simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, simde_vmulh_f16(b_.values[2 * i + 1], a_.values[2 * i + 1])); r_.values[2 * i + 1] = simde_vaddh_f16( r_.values[2 * i + 1], - simde_vmulh_f16(-(b_.values[2 * i]), a_.values[2 * i + 1])); + simde_vmulh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i])), + a_.values[2 * i + 1])); } result = simde_float16x8_from_private(r_); @@ -215,7 +219,9 @@ simde_float16x4_t simde_vcmla_rot270_laneq_f16(simde_float16x4_t r, simde_vmulh_f16(b_.values[2 * i + 1], a_.values[2 * i + 1])); r_.values[2 * i + 1] = simde_vaddh_f16( r_.values[2 * i + 1], - simde_vmulh_f16(-(b_.values[2 * i]), a_.values[2 * i + 1])); + simde_vmulh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i])), + a_.values[2 * i + 1])); } result = simde_float16x4_from_private(r_); @@ -295,7 +301,9 @@ simde_float16x8_t simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, simde_vmulh_f16(b_.values[2 * i + 1], a_.values[2 * i + 1])); r_.values[2 * i + 1] = simde_vaddh_f16( r_.values[2 * i + 1], - simde_vmulh_f16(-(b_.values[2 * i]), a_.values[2 * i + 1])); + simde_vmulh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i])), + a_.values[2 * i + 1])); } result = simde_float16x8_from_private(r_); diff --git a/simde/arm/neon/cmla_rot90_lane.h b/simde/arm/neon/cmla_rot90_lane.h index f6b333e58..ef3113a37 100644 --- a/simde/arm/neon/cmla_rot90_lane.h +++ b/simde/arm/neon/cmla_rot90_lane.h @@ -58,7 +58,9 @@ simde_float16x4_t simde_vcmla_rot90_lane_f16(simde_float16x4_t r, i++) { r_.values[2 * i] = simde_vaddh_f16( r_.values[2 * i], - simde_vmulh_f16(-(b_.values[2 * i + 1]), a_.values[2 * i + 1])); + simde_vmulh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i + 1])), + a_.values[2 * i + 1])); r_.values[2 * i + 1] = simde_vaddh_f16( r_.values[2 * i + 1], simde_vmulh_f16(b_.values[2 * i], a_.values[2 * i + 1])); @@ -134,7 +136,9 @@ simde_float16x8_t simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, i++) { r_.values[2 * i] = simde_vaddh_f16( r_.values[2 * i], - simde_vmulh_f16(-(b_.values[2 * i + 1]), a_.values[2 * i + 1])); + simde_vmulh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i + 1])), + a_.values[2 * i + 1])); r_.values[2 * i + 1] = simde_vaddh_f16( r_.values[2 * i + 1], simde_vmulh_f16(b_.values[2 * i], a_.values[2 * i + 1])); @@ -211,7 +215,9 @@ simde_float16x4_t simde_vcmla_rot90_laneq_f16(simde_float16x4_t r, i++) { r_.values[2 * i] = simde_vaddh_f16( r_.values[2 * i], - simde_vmulh_f16(-(b_.values[2 * i + 1]), a_.values[2 * i + 1])); + simde_vmulh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i + 1])), + a_.values[2 * i + 1])); r_.values[2 * i + 1] = simde_vaddh_f16( r_.values[2 * i + 1], simde_vmulh_f16(b_.values[2 * i], a_.values[2 * i + 1])); @@ -291,7 +297,9 @@ simde_float16x8_t simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, i++) { r_.values[2 * i] = simde_vaddh_f16( r_.values[2 * i], - simde_vmulh_f16(-(b_.values[2 * i + 1]), a_.values[2 * i + 1])); + simde_vmulh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i + 1])), + a_.values[2 * i + 1])); r_.values[2 * i + 1] = simde_vaddh_f16( r_.values[2 * i + 1], simde_vmulh_f16(b_.values[2 * i], a_.values[2 * i + 1])); From 856b4a754dcdbd488f2f2f571b2507981040d2da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Mon, 16 Oct 2023 21:54:59 +0800 Subject: [PATCH 15/29] [Fix] : not using static const struct for f16 --- test/arm/neon/cadd_rot270.c | 450 +++++++++++++------------------ test/arm/neon/cadd_rot90.c | 4 +- test/arm/neon/cmla_lane.c | 8 +- test/arm/neon/cmla_rot180_lane.c | 8 +- test/arm/neon/cmla_rot270_lane.c | 8 +- test/arm/neon/cmla_rot90_lane.c | 8 +- 6 files changed, 200 insertions(+), 286 deletions(-) diff --git a/test/arm/neon/cadd_rot270.c b/test/arm/neon/cadd_rot270.c index f73d0cd45..937a99c77 100644 --- a/test/arm/neon/cadd_rot270.c +++ b/test/arm/neon/cadd_rot270.c @@ -1,77 +1,55 @@ #define SIMDE_TEST_ARM_NEON_INSN cadd_rot270 -#include "../../../simde/arm/neon/cadd_rot270.h" - #include "test-neon.h" +#include "../../../simde/arm/neon/cadd_rot270.h" -static int test_simde_vcadd_rot270_f16(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcadd_rot270_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 - static const struct { + struct { simde_float16_t a[4]; simde_float16_t b[4]; simde_float16_t r[4]; } test_vec[] = { - {{SIMDE_FLOAT16_VALUE(816.50), SIMDE_FLOAT16_VALUE(773.50), - SIMDE_FLOAT16_VALUE(-377.75), SIMDE_FLOAT16_VALUE(544.50)}, - {SIMDE_FLOAT16_VALUE(-188.38), SIMDE_FLOAT16_VALUE(269.75), - SIMDE_FLOAT16_VALUE(115.62), SIMDE_FLOAT16_VALUE(-544.00)}, - {SIMDE_FLOAT16_VALUE(1086.00), SIMDE_FLOAT16_VALUE(962.00), - SIMDE_FLOAT16_VALUE(-922.00), SIMDE_FLOAT16_VALUE(429.00)}}, - {{SIMDE_FLOAT16_VALUE(-659.50), SIMDE_FLOAT16_VALUE(924.50), - SIMDE_FLOAT16_VALUE(484.00), SIMDE_FLOAT16_VALUE(-757.00)}, - {SIMDE_FLOAT16_VALUE(730.00), SIMDE_FLOAT16_VALUE(103.12), - SIMDE_FLOAT16_VALUE(-381.25), SIMDE_FLOAT16_VALUE(898.50)}, - {SIMDE_FLOAT16_VALUE(-556.50), SIMDE_FLOAT16_VALUE(194.50), - SIMDE_FLOAT16_VALUE(1382.00), SIMDE_FLOAT16_VALUE(-375.75)}}, - {{SIMDE_FLOAT16_VALUE(293.00), SIMDE_FLOAT16_VALUE(-114.06), - SIMDE_FLOAT16_VALUE(-137.62), SIMDE_FLOAT16_VALUE(722.50)}, - {SIMDE_FLOAT16_VALUE(-6.33), SIMDE_FLOAT16_VALUE(161.38), - SIMDE_FLOAT16_VALUE(115.50), SIMDE_FLOAT16_VALUE(70.38)}, - {SIMDE_FLOAT16_VALUE(454.50), SIMDE_FLOAT16_VALUE(-107.75), - SIMDE_FLOAT16_VALUE(-67.25), SIMDE_FLOAT16_VALUE(607.00)}}, - {{SIMDE_FLOAT16_VALUE(-899.00), SIMDE_FLOAT16_VALUE(-582.50), - SIMDE_FLOAT16_VALUE(359.00), SIMDE_FLOAT16_VALUE(342.25)}, - {SIMDE_FLOAT16_VALUE(913.00), SIMDE_FLOAT16_VALUE(740.50), - SIMDE_FLOAT16_VALUE(-436.25), SIMDE_FLOAT16_VALUE(-904.00)}, - {SIMDE_FLOAT16_VALUE(-158.50), SIMDE_FLOAT16_VALUE(-1496.00), - SIMDE_FLOAT16_VALUE(-545.00), SIMDE_FLOAT16_VALUE(778.50)}}, - {{SIMDE_FLOAT16_VALUE(-335.25), SIMDE_FLOAT16_VALUE(-252.38), - SIMDE_FLOAT16_VALUE(227.25), SIMDE_FLOAT16_VALUE(-8.67)}, - {SIMDE_FLOAT16_VALUE(490.50), SIMDE_FLOAT16_VALUE(507.50), - SIMDE_FLOAT16_VALUE(-979.50), SIMDE_FLOAT16_VALUE(-718.00)}, - {SIMDE_FLOAT16_VALUE(172.25), SIMDE_FLOAT16_VALUE(-743.00), - SIMDE_FLOAT16_VALUE(-490.75), SIMDE_FLOAT16_VALUE(971.00)}}, - {{SIMDE_FLOAT16_VALUE(498.50), SIMDE_FLOAT16_VALUE(205.75), - SIMDE_FLOAT16_VALUE(-797.50), SIMDE_FLOAT16_VALUE(741.50)}, - {SIMDE_FLOAT16_VALUE(-835.00), SIMDE_FLOAT16_VALUE(-308.00), - SIMDE_FLOAT16_VALUE(-674.50), SIMDE_FLOAT16_VALUE(-247.00)}, - {SIMDE_FLOAT16_VALUE(190.50), SIMDE_FLOAT16_VALUE(1041.00), - SIMDE_FLOAT16_VALUE(-1044.00), SIMDE_FLOAT16_VALUE(1416.00)}}, - {{SIMDE_FLOAT16_VALUE(-210.88), SIMDE_FLOAT16_VALUE(-430.75), - SIMDE_FLOAT16_VALUE(725.50), SIMDE_FLOAT16_VALUE(-944.50)}, - {SIMDE_FLOAT16_VALUE(142.00), SIMDE_FLOAT16_VALUE(-966.00), - SIMDE_FLOAT16_VALUE(-131.50), SIMDE_FLOAT16_VALUE(462.50)}, - {SIMDE_FLOAT16_VALUE(-1177.00), SIMDE_FLOAT16_VALUE(-573.00), - SIMDE_FLOAT16_VALUE(1188.00), SIMDE_FLOAT16_VALUE(-813.00)}}, - {{SIMDE_FLOAT16_VALUE(-361.00), SIMDE_FLOAT16_VALUE(185.62), - SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(749.50)}, - {SIMDE_FLOAT16_VALUE(-975.50), SIMDE_FLOAT16_VALUE(383.50), - SIMDE_FLOAT16_VALUE(500.25), SIMDE_FLOAT16_VALUE(517.00)}, - {SIMDE_FLOAT16_VALUE(22.50), SIMDE_FLOAT16_VALUE(1161.00), - SIMDE_FLOAT16_VALUE(577.50), SIMDE_FLOAT16_VALUE(249.25)}}}; + { { SIMDE_FLOAT16_VALUE( 816.50), SIMDE_FLOAT16_VALUE( 773.50), SIMDE_FLOAT16_VALUE( -377.75), SIMDE_FLOAT16_VALUE( 544.50) }, + { SIMDE_FLOAT16_VALUE( -188.38), SIMDE_FLOAT16_VALUE( 269.75), SIMDE_FLOAT16_VALUE( 115.62), SIMDE_FLOAT16_VALUE( -544.00) }, + { SIMDE_FLOAT16_VALUE( 1086.00), SIMDE_FLOAT16_VALUE( 962.00), SIMDE_FLOAT16_VALUE( -922.00), SIMDE_FLOAT16_VALUE( 429.00) } }, + { { SIMDE_FLOAT16_VALUE( -659.50), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( 484.00), SIMDE_FLOAT16_VALUE( -757.00) }, + { SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, + { SIMDE_FLOAT16_VALUE( -556.50), SIMDE_FLOAT16_VALUE( 194.50), SIMDE_FLOAT16_VALUE( 1382.00), SIMDE_FLOAT16_VALUE( -375.75) } }, + { { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, + { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, + { SIMDE_FLOAT16_VALUE( 454.50), SIMDE_FLOAT16_VALUE( -107.75), SIMDE_FLOAT16_VALUE( -67.25), SIMDE_FLOAT16_VALUE( 607.00) } }, + { { SIMDE_FLOAT16_VALUE( -899.00), SIMDE_FLOAT16_VALUE( -582.50), SIMDE_FLOAT16_VALUE( 359.00), SIMDE_FLOAT16_VALUE( 342.25) }, + { SIMDE_FLOAT16_VALUE( 913.00), SIMDE_FLOAT16_VALUE( 740.50), SIMDE_FLOAT16_VALUE( -436.25), SIMDE_FLOAT16_VALUE( -904.00) }, + { SIMDE_FLOAT16_VALUE( -158.50), SIMDE_FLOAT16_VALUE( -1496.00), SIMDE_FLOAT16_VALUE( -545.00), SIMDE_FLOAT16_VALUE( 778.50) } }, + { { SIMDE_FLOAT16_VALUE( -335.25), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( 227.25), SIMDE_FLOAT16_VALUE( -8.67) }, + { SIMDE_FLOAT16_VALUE( 490.50), SIMDE_FLOAT16_VALUE( 507.50), SIMDE_FLOAT16_VALUE( -979.50), SIMDE_FLOAT16_VALUE( -718.00) }, + { SIMDE_FLOAT16_VALUE( 172.25), SIMDE_FLOAT16_VALUE( -743.00), SIMDE_FLOAT16_VALUE( -490.75), SIMDE_FLOAT16_VALUE( 971.00) } }, + { { SIMDE_FLOAT16_VALUE( 498.50), SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -797.50), SIMDE_FLOAT16_VALUE( 741.50) }, + { SIMDE_FLOAT16_VALUE( -835.00), SIMDE_FLOAT16_VALUE( -308.00), SIMDE_FLOAT16_VALUE( -674.50), SIMDE_FLOAT16_VALUE( -247.00) }, + { SIMDE_FLOAT16_VALUE( 190.50), SIMDE_FLOAT16_VALUE( 1041.00), SIMDE_FLOAT16_VALUE( -1044.00), SIMDE_FLOAT16_VALUE( 1416.00) } }, + { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, + { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, + { SIMDE_FLOAT16_VALUE( -1177.00), SIMDE_FLOAT16_VALUE( -573.00), SIMDE_FLOAT16_VALUE( 1188.00), SIMDE_FLOAT16_VALUE( -813.00) } }, + { { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50) }, + { SIMDE_FLOAT16_VALUE( -975.50), SIMDE_FLOAT16_VALUE( 383.50), SIMDE_FLOAT16_VALUE( 500.25), SIMDE_FLOAT16_VALUE( 517.00) }, + { SIMDE_FLOAT16_VALUE( 22.50), SIMDE_FLOAT16_VALUE( 1161.00), SIMDE_FLOAT16_VALUE( 577.50), SIMDE_FLOAT16_VALUE( 249.25) } } + }; - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x4_t r = simde_vcadd_rot270_f16(a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); } + return 0; #else fputc('\n', stdout); - for (int i = 0; i < 8; i++) { + for (int i = 0 ; i < 8 ; i++) { simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t r = simde_vcadd_rot270_f16(a, b); @@ -84,122 +62,76 @@ static int test_simde_vcadd_rot270_f16(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcaddq_rot270_f16(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcaddq_rot270_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 - static const struct { + struct { simde_float16_t a[8]; simde_float16_t b[8]; simde_float16_t r[8]; } test_vec[] = { - {{SIMDE_FLOAT16_VALUE(540.00), SIMDE_FLOAT16_VALUE(-927.50), - SIMDE_FLOAT16_VALUE(17.94), SIMDE_FLOAT16_VALUE(340.75), - SIMDE_FLOAT16_VALUE(-397.75), SIMDE_FLOAT16_VALUE(166.12), - SIMDE_FLOAT16_VALUE(854.00), SIMDE_FLOAT16_VALUE(-110.75)}, - {SIMDE_FLOAT16_VALUE(-936.50), SIMDE_FLOAT16_VALUE(-465.00), - SIMDE_FLOAT16_VALUE(-104.00), SIMDE_FLOAT16_VALUE(-348.00), - SIMDE_FLOAT16_VALUE(-370.75), SIMDE_FLOAT16_VALUE(18.20), - SIMDE_FLOAT16_VALUE(-209.25), SIMDE_FLOAT16_VALUE(-856.00)}, - {SIMDE_FLOAT16_VALUE(75.000000), SIMDE_FLOAT16_VALUE(9.000000), - SIMDE_FLOAT16_VALUE(-330.000000), SIMDE_FLOAT16_VALUE(444.750000), - SIMDE_FLOAT16_VALUE(-379.500000), SIMDE_FLOAT16_VALUE(537.000000), - SIMDE_FLOAT16_VALUE(-2.000000), SIMDE_FLOAT16_VALUE(98.500000)}}, - {{SIMDE_FLOAT16_VALUE(251.25), SIMDE_FLOAT16_VALUE(523.50), - SIMDE_FLOAT16_VALUE(-62.34), SIMDE_FLOAT16_VALUE(-412.00), - SIMDE_FLOAT16_VALUE(818.50), SIMDE_FLOAT16_VALUE(-266.25), - SIMDE_FLOAT16_VALUE(558.50), SIMDE_FLOAT16_VALUE(-474.75)}, - {SIMDE_FLOAT16_VALUE(583.50), SIMDE_FLOAT16_VALUE(-666.00), - SIMDE_FLOAT16_VALUE(875.50), SIMDE_FLOAT16_VALUE(382.50), - SIMDE_FLOAT16_VALUE(-535.00), SIMDE_FLOAT16_VALUE(374.50), - SIMDE_FLOAT16_VALUE(89.81), SIMDE_FLOAT16_VALUE(432.50)}, - {SIMDE_FLOAT16_VALUE(-414.750000), SIMDE_FLOAT16_VALUE(-60.000000), - SIMDE_FLOAT16_VALUE(320.250000), SIMDE_FLOAT16_VALUE(-1288.000000), - SIMDE_FLOAT16_VALUE(1193.000000), SIMDE_FLOAT16_VALUE(268.750000), - SIMDE_FLOAT16_VALUE(991.000000), SIMDE_FLOAT16_VALUE(-564.500000)}}, - {{SIMDE_FLOAT16_VALUE(83.19), SIMDE_FLOAT16_VALUE(418.00), - SIMDE_FLOAT16_VALUE(-875.50), SIMDE_FLOAT16_VALUE(830.50), - SIMDE_FLOAT16_VALUE(-633.50), SIMDE_FLOAT16_VALUE(832.00), - SIMDE_FLOAT16_VALUE(-890.00), SIMDE_FLOAT16_VALUE(650.00)}, - {SIMDE_FLOAT16_VALUE(-111.25), SIMDE_FLOAT16_VALUE(-830.50), - SIMDE_FLOAT16_VALUE(59.75), SIMDE_FLOAT16_VALUE(970.50), - SIMDE_FLOAT16_VALUE(522.50), SIMDE_FLOAT16_VALUE(-822.50), - SIMDE_FLOAT16_VALUE(411.25), SIMDE_FLOAT16_VALUE(-692.50)}, - {SIMDE_FLOAT16_VALUE(-747.500000), SIMDE_FLOAT16_VALUE(529.000000), - SIMDE_FLOAT16_VALUE(95.000000), SIMDE_FLOAT16_VALUE(771.000000), - SIMDE_FLOAT16_VALUE(-1456.000000), SIMDE_FLOAT16_VALUE(309.500000), - SIMDE_FLOAT16_VALUE(-1582.000000), SIMDE_FLOAT16_VALUE(238.750000)}}, - {{SIMDE_FLOAT16_VALUE(648.50), SIMDE_FLOAT16_VALUE(385.25), - SIMDE_FLOAT16_VALUE(479.25), SIMDE_FLOAT16_VALUE(-793.50), - SIMDE_FLOAT16_VALUE(-740.50), SIMDE_FLOAT16_VALUE(245.00), - SIMDE_FLOAT16_VALUE(229.25), SIMDE_FLOAT16_VALUE(-113.25)}, - {SIMDE_FLOAT16_VALUE(331.50), SIMDE_FLOAT16_VALUE(-677.50), - SIMDE_FLOAT16_VALUE(97.31), SIMDE_FLOAT16_VALUE(-52.09), - SIMDE_FLOAT16_VALUE(239.12), SIMDE_FLOAT16_VALUE(469.75), - SIMDE_FLOAT16_VALUE(-543.50), SIMDE_FLOAT16_VALUE(827.00)}, - {SIMDE_FLOAT16_VALUE(-29.000000), SIMDE_FLOAT16_VALUE(53.750000), - SIMDE_FLOAT16_VALUE(427.250000), SIMDE_FLOAT16_VALUE(-891.000000), - SIMDE_FLOAT16_VALUE(-270.750000), SIMDE_FLOAT16_VALUE(5.875000), - SIMDE_FLOAT16_VALUE(1056.000000), SIMDE_FLOAT16_VALUE(430.250000)}}, - {{SIMDE_FLOAT16_VALUE(226.38), SIMDE_FLOAT16_VALUE(178.88), - SIMDE_FLOAT16_VALUE(181.75), SIMDE_FLOAT16_VALUE(420.50), - SIMDE_FLOAT16_VALUE(-699.00), SIMDE_FLOAT16_VALUE(-151.12), - SIMDE_FLOAT16_VALUE(-388.25), SIMDE_FLOAT16_VALUE(350.75)}, - {SIMDE_FLOAT16_VALUE(890.50), SIMDE_FLOAT16_VALUE(-665.00), - SIMDE_FLOAT16_VALUE(-618.00), SIMDE_FLOAT16_VALUE(190.88), - SIMDE_FLOAT16_VALUE(218.12), SIMDE_FLOAT16_VALUE(-329.00), - SIMDE_FLOAT16_VALUE(-549.50), SIMDE_FLOAT16_VALUE(-460.00)}, - {SIMDE_FLOAT16_VALUE(-438.500000), SIMDE_FLOAT16_VALUE(-711.500000), - SIMDE_FLOAT16_VALUE(372.500000), SIMDE_FLOAT16_VALUE(1038.000000), - SIMDE_FLOAT16_VALUE(-1028.000000), SIMDE_FLOAT16_VALUE(-369.250000), - SIMDE_FLOAT16_VALUE(-848.000000), SIMDE_FLOAT16_VALUE(900.000000)}}, - {{SIMDE_FLOAT16_VALUE(-378.00), SIMDE_FLOAT16_VALUE(-695.50), - SIMDE_FLOAT16_VALUE(121.50), SIMDE_FLOAT16_VALUE(-78.44), - SIMDE_FLOAT16_VALUE(-869.50), SIMDE_FLOAT16_VALUE(-710.00), - SIMDE_FLOAT16_VALUE(457.25), SIMDE_FLOAT16_VALUE(579.00)}, - {SIMDE_FLOAT16_VALUE(-786.50), SIMDE_FLOAT16_VALUE(-974.00), - SIMDE_FLOAT16_VALUE(-570.50), SIMDE_FLOAT16_VALUE(369.25), - SIMDE_FLOAT16_VALUE(48.06), SIMDE_FLOAT16_VALUE(-640.00), - SIMDE_FLOAT16_VALUE(-552.00), SIMDE_FLOAT16_VALUE(75.88)}, - {SIMDE_FLOAT16_VALUE(-1352.000000), SIMDE_FLOAT16_VALUE(91.000000), - SIMDE_FLOAT16_VALUE(490.750000), SIMDE_FLOAT16_VALUE(492.000000), - SIMDE_FLOAT16_VALUE(-1510.000000), SIMDE_FLOAT16_VALUE(-758.000000), - SIMDE_FLOAT16_VALUE(533.000000), SIMDE_FLOAT16_VALUE(1131.000000)}}, - {{SIMDE_FLOAT16_VALUE(352.25), SIMDE_FLOAT16_VALUE(324.25), - SIMDE_FLOAT16_VALUE(485.50), SIMDE_FLOAT16_VALUE(943.50), - SIMDE_FLOAT16_VALUE(-976.00), SIMDE_FLOAT16_VALUE(395.50), - SIMDE_FLOAT16_VALUE(-100.62), SIMDE_FLOAT16_VALUE(-463.75)}, - {SIMDE_FLOAT16_VALUE(-973.00), SIMDE_FLOAT16_VALUE(-159.62), - SIMDE_FLOAT16_VALUE(-377.00), SIMDE_FLOAT16_VALUE(-853.00), - SIMDE_FLOAT16_VALUE(831.00), SIMDE_FLOAT16_VALUE(661.00), - SIMDE_FLOAT16_VALUE(-717.00), SIMDE_FLOAT16_VALUE(891.50)}, - {SIMDE_FLOAT16_VALUE(192.625000), SIMDE_FLOAT16_VALUE(1297.000000), - SIMDE_FLOAT16_VALUE(-367.500000), SIMDE_FLOAT16_VALUE(1320.000000), - SIMDE_FLOAT16_VALUE(-315.000000), SIMDE_FLOAT16_VALUE(-435.500000), - SIMDE_FLOAT16_VALUE(791.000000), SIMDE_FLOAT16_VALUE(253.250000)}}, - {{SIMDE_FLOAT16_VALUE(535.50), SIMDE_FLOAT16_VALUE(621.50), - SIMDE_FLOAT16_VALUE(578.50), SIMDE_FLOAT16_VALUE(430.50), - SIMDE_FLOAT16_VALUE(-574.50), SIMDE_FLOAT16_VALUE(-319.75), - SIMDE_FLOAT16_VALUE(-735.00), SIMDE_FLOAT16_VALUE(248.50)}, - {SIMDE_FLOAT16_VALUE(274.50), SIMDE_FLOAT16_VALUE(192.38), - SIMDE_FLOAT16_VALUE(679.00), SIMDE_FLOAT16_VALUE(502.25), - SIMDE_FLOAT16_VALUE(-618.50), SIMDE_FLOAT16_VALUE(-70.81), - SIMDE_FLOAT16_VALUE(-596.50), SIMDE_FLOAT16_VALUE(810.50)}, - {SIMDE_FLOAT16_VALUE(728.000000), SIMDE_FLOAT16_VALUE(347.000000), - SIMDE_FLOAT16_VALUE(1081.000000), SIMDE_FLOAT16_VALUE(-248.500000), - SIMDE_FLOAT16_VALUE(-645.500000), SIMDE_FLOAT16_VALUE(298.750000), - SIMDE_FLOAT16_VALUE(75.500000), SIMDE_FLOAT16_VALUE(845.000000)}}}; + { { SIMDE_FLOAT16_VALUE( 540.00), SIMDE_FLOAT16_VALUE( -927.50), SIMDE_FLOAT16_VALUE( 17.94), SIMDE_FLOAT16_VALUE( 340.75), + SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( 854.00), SIMDE_FLOAT16_VALUE( -110.75) }, + { SIMDE_FLOAT16_VALUE( -936.50), SIMDE_FLOAT16_VALUE( -465.00), SIMDE_FLOAT16_VALUE( -104.00), SIMDE_FLOAT16_VALUE( -348.00), + SIMDE_FLOAT16_VALUE( -370.75), SIMDE_FLOAT16_VALUE( 18.20), SIMDE_FLOAT16_VALUE( -209.25), SIMDE_FLOAT16_VALUE( -856.00) }, + { SIMDE_FLOAT16_VALUE( 75.00 ), SIMDE_FLOAT16_VALUE( 9.00), SIMDE_FLOAT16_VALUE(-330.00), SIMDE_FLOAT16_VALUE(444.75), + SIMDE_FLOAT16_VALUE( -379.50), SIMDE_FLOAT16_VALUE( 537.00 ), SIMDE_FLOAT16_VALUE(-2.00), SIMDE_FLOAT16_VALUE(98.50) } }, + { { SIMDE_FLOAT16_VALUE( 251.25), SIMDE_FLOAT16_VALUE( 523.50), SIMDE_FLOAT16_VALUE( -62.34), SIMDE_FLOAT16_VALUE( -412.00), + SIMDE_FLOAT16_VALUE( 818.50), SIMDE_FLOAT16_VALUE( -266.25), SIMDE_FLOAT16_VALUE( 558.50), SIMDE_FLOAT16_VALUE( -474.75) }, + { SIMDE_FLOAT16_VALUE( 583.50), SIMDE_FLOAT16_VALUE( -666.00), SIMDE_FLOAT16_VALUE( 875.50), SIMDE_FLOAT16_VALUE( 382.50), + SIMDE_FLOAT16_VALUE( -535.00), SIMDE_FLOAT16_VALUE( 374.50), SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 432.50) }, + { SIMDE_FLOAT16_VALUE(-414.75), SIMDE_FLOAT16_VALUE(-60.00), SIMDE_FLOAT16_VALUE(320.25), SIMDE_FLOAT16_VALUE(-1288.00), + SIMDE_FLOAT16_VALUE(1193.00), SIMDE_FLOAT16_VALUE(268.75), SIMDE_FLOAT16_VALUE(991.00), SIMDE_FLOAT16_VALUE(-564.50) } }, + { { SIMDE_FLOAT16_VALUE( 83.19), SIMDE_FLOAT16_VALUE( 418.00), SIMDE_FLOAT16_VALUE( -875.50), SIMDE_FLOAT16_VALUE( 830.50), + SIMDE_FLOAT16_VALUE( -633.50), SIMDE_FLOAT16_VALUE( 832.00), SIMDE_FLOAT16_VALUE( -890.00), SIMDE_FLOAT16_VALUE( 650.00) }, + { SIMDE_FLOAT16_VALUE( -111.25), SIMDE_FLOAT16_VALUE( -830.50), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( 970.50), + SIMDE_FLOAT16_VALUE( 522.50), SIMDE_FLOAT16_VALUE( -822.50), SIMDE_FLOAT16_VALUE( 411.25), SIMDE_FLOAT16_VALUE( -692.50) }, + { SIMDE_FLOAT16_VALUE(-747.50), SIMDE_FLOAT16_VALUE(529.00), SIMDE_FLOAT16_VALUE(95.00), SIMDE_FLOAT16_VALUE(771.000000), + SIMDE_FLOAT16_VALUE(-1456.00), SIMDE_FLOAT16_VALUE(309.50), SIMDE_FLOAT16_VALUE(-1582.00), SIMDE_FLOAT16_VALUE(238.750000) } }, + { { SIMDE_FLOAT16_VALUE( 648.50), SIMDE_FLOAT16_VALUE( 385.25), SIMDE_FLOAT16_VALUE( 479.25), SIMDE_FLOAT16_VALUE( -793.50), + SIMDE_FLOAT16_VALUE( -740.50), SIMDE_FLOAT16_VALUE( 245.00), SIMDE_FLOAT16_VALUE( 229.25), SIMDE_FLOAT16_VALUE( -113.25) }, + { SIMDE_FLOAT16_VALUE( 331.50), SIMDE_FLOAT16_VALUE( -677.50), SIMDE_FLOAT16_VALUE( 97.31), SIMDE_FLOAT16_VALUE( -52.09), + SIMDE_FLOAT16_VALUE( 239.12), SIMDE_FLOAT16_VALUE( 469.75), SIMDE_FLOAT16_VALUE( -543.50), SIMDE_FLOAT16_VALUE( 827.00) }, + { SIMDE_FLOAT16_VALUE(-29.00), SIMDE_FLOAT16_VALUE(53.75), SIMDE_FLOAT16_VALUE(427.25), SIMDE_FLOAT16_VALUE(-891.000000), + SIMDE_FLOAT16_VALUE(-270.75), SIMDE_FLOAT16_VALUE( 5.88), SIMDE_FLOAT16_VALUE(1056.00), SIMDE_FLOAT16_VALUE(430.250000) } }, + { { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( 420.50), + SIMDE_FLOAT16_VALUE( -699.00), SIMDE_FLOAT16_VALUE( -151.12), SIMDE_FLOAT16_VALUE( -388.25), SIMDE_FLOAT16_VALUE( 350.75) }, + { SIMDE_FLOAT16_VALUE( 890.50), SIMDE_FLOAT16_VALUE( -665.00), SIMDE_FLOAT16_VALUE( -618.00), SIMDE_FLOAT16_VALUE( 190.88), + SIMDE_FLOAT16_VALUE( 218.12), SIMDE_FLOAT16_VALUE( -329.00), SIMDE_FLOAT16_VALUE( -549.50), SIMDE_FLOAT16_VALUE( -460.00) }, + { SIMDE_FLOAT16_VALUE(-438.50), SIMDE_FLOAT16_VALUE(-711.500000), SIMDE_FLOAT16_VALUE(372.500000), SIMDE_FLOAT16_VALUE(1038.000000), + SIMDE_FLOAT16_VALUE(-1028.00), SIMDE_FLOAT16_VALUE(-369.250000), SIMDE_FLOAT16_VALUE(-848.000000), SIMDE_FLOAT16_VALUE(900.000000) } }, + { { SIMDE_FLOAT16_VALUE( -378.00), SIMDE_FLOAT16_VALUE( -695.50), SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44), + SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00) }, + { SIMDE_FLOAT16_VALUE( -786.50), SIMDE_FLOAT16_VALUE( -974.00), SIMDE_FLOAT16_VALUE( -570.50), SIMDE_FLOAT16_VALUE( 369.25), + SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( -640.00), SIMDE_FLOAT16_VALUE( -552.00), SIMDE_FLOAT16_VALUE( 75.88) }, + { SIMDE_FLOAT16_VALUE(-1352.00), SIMDE_FLOAT16_VALUE(91.00), SIMDE_FLOAT16_VALUE(490.750000), SIMDE_FLOAT16_VALUE(492.000000), + SIMDE_FLOAT16_VALUE(-1510.00), SIMDE_FLOAT16_VALUE(-758.00), SIMDE_FLOAT16_VALUE(533.000000), SIMDE_FLOAT16_VALUE(1131.000000) } }, + { { SIMDE_FLOAT16_VALUE( 352.25), SIMDE_FLOAT16_VALUE( 324.25), SIMDE_FLOAT16_VALUE( 485.50), SIMDE_FLOAT16_VALUE( 943.50), + SIMDE_FLOAT16_VALUE( -976.00), SIMDE_FLOAT16_VALUE( 395.50), SIMDE_FLOAT16_VALUE( -100.62), SIMDE_FLOAT16_VALUE( -463.75) }, + { SIMDE_FLOAT16_VALUE( -973.00), SIMDE_FLOAT16_VALUE( -159.62), SIMDE_FLOAT16_VALUE( -377.00), SIMDE_FLOAT16_VALUE( -853.00), + SIMDE_FLOAT16_VALUE( 831.00), SIMDE_FLOAT16_VALUE( 661.00), SIMDE_FLOAT16_VALUE( -717.00), SIMDE_FLOAT16_VALUE( 891.50) }, + { SIMDE_FLOAT16_VALUE(192.63), SIMDE_FLOAT16_VALUE(1297.00), SIMDE_FLOAT16_VALUE(-367.500000), SIMDE_FLOAT16_VALUE(1320.000000), + SIMDE_FLOAT16_VALUE(-315.00), SIMDE_FLOAT16_VALUE(-435.50), SIMDE_FLOAT16_VALUE(791.000000), SIMDE_FLOAT16_VALUE(253.250000) } }, + { { SIMDE_FLOAT16_VALUE( 535.50), SIMDE_FLOAT16_VALUE( 621.50), SIMDE_FLOAT16_VALUE( 578.50), SIMDE_FLOAT16_VALUE( 430.50), + SIMDE_FLOAT16_VALUE( -574.50), SIMDE_FLOAT16_VALUE( -319.75), SIMDE_FLOAT16_VALUE( -735.00), SIMDE_FLOAT16_VALUE( 248.50) }, + { SIMDE_FLOAT16_VALUE( 274.50), SIMDE_FLOAT16_VALUE( 192.38), SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 502.25), + SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( -70.81), SIMDE_FLOAT16_VALUE( -596.50), SIMDE_FLOAT16_VALUE( 810.50) }, + { SIMDE_FLOAT16_VALUE(728.00), SIMDE_FLOAT16_VALUE(347.00), SIMDE_FLOAT16_VALUE(1081.000000), SIMDE_FLOAT16_VALUE(-248.500000), + SIMDE_FLOAT16_VALUE(-645.50), SIMDE_FLOAT16_VALUE(298.75), SIMDE_FLOAT16_VALUE(75.500000), SIMDE_FLOAT16_VALUE(845.000000) } } + }; - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); simde_float16x8_t r = simde_vcaddq_rot270_f16(a, b); - simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), - 1); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); } + return 0; #else fputc('\n', stdout); - for (int i = 0; i < 8; i++) { + for (int i = 0 ; i < 8 ; i++) { simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t r = simde_vcaddq_rot270_f16(a, b); @@ -212,50 +144,53 @@ static int test_simde_vcaddq_rot270_f16(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcadd_rot270_f32(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcadd_rot270_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { - {{SIMDE_FLOAT32_C(863.66), SIMDE_FLOAT32_C(828.31)}, - {SIMDE_FLOAT32_C(-563.51), SIMDE_FLOAT32_C(-576.51)}, - {SIMDE_FLOAT32_C(287.149963), SIMDE_FLOAT32_C(1391.820068)}}, - {{SIMDE_FLOAT32_C(-703.45), SIMDE_FLOAT32_C(383.90)}, - {SIMDE_FLOAT32_C(-772.46), SIMDE_FLOAT32_C(457.40)}, - {SIMDE_FLOAT32_C(-246.050018), SIMDE_FLOAT32_C(1156.359985)}}, - {{SIMDE_FLOAT32_C(295.99), SIMDE_FLOAT32_C(653.10)}, - {SIMDE_FLOAT32_C(-120.98), SIMDE_FLOAT32_C(945.50)}, - {SIMDE_FLOAT32_C(1241.489990), SIMDE_FLOAT32_C(774.079956)}}, - {{SIMDE_FLOAT32_C(-280.81), SIMDE_FLOAT32_C(631.32)}, - {SIMDE_FLOAT32_C(688.34), SIMDE_FLOAT32_C(191.95)}, - {SIMDE_FLOAT32_C(-88.860001), SIMDE_FLOAT32_C(-57.020020)}}, - {{SIMDE_FLOAT32_C(-522.88), SIMDE_FLOAT32_C(-323.79)}, - {SIMDE_FLOAT32_C(-887.99), SIMDE_FLOAT32_C(-283.70)}, - {SIMDE_FLOAT32_C(-806.580017), SIMDE_FLOAT32_C(564.199951)}}, - {{SIMDE_FLOAT32_C(-117.76), SIMDE_FLOAT32_C(-841.45)}, - {SIMDE_FLOAT32_C(664.94), SIMDE_FLOAT32_C(-987.19)}, - {SIMDE_FLOAT32_C(-1104.949951), SIMDE_FLOAT32_C(-1506.390015)}}, - {{SIMDE_FLOAT32_C(-642.89), SIMDE_FLOAT32_C(-152.10)}, - {SIMDE_FLOAT32_C(963.83), SIMDE_FLOAT32_C(919.89)}, - {SIMDE_FLOAT32_C(277.000000), SIMDE_FLOAT32_C(-1115.930054)}}, - {{SIMDE_FLOAT32_C(630.40), SIMDE_FLOAT32_C(-669.33)}, - {SIMDE_FLOAT32_C(671.13), SIMDE_FLOAT32_C(256.93)}, - {SIMDE_FLOAT32_C(887.330017), SIMDE_FLOAT32_C(-1340.459961)}}}; + { { SIMDE_FLOAT32_C( 863.66), SIMDE_FLOAT32_C( 828.31) }, + { SIMDE_FLOAT32_C( -563.51), SIMDE_FLOAT32_C( -576.51) }, + { SIMDE_FLOAT32_C(287.149963), SIMDE_FLOAT32_C(1391.820068) } }, + { { SIMDE_FLOAT32_C( -703.45), SIMDE_FLOAT32_C( 383.90) }, + { SIMDE_FLOAT32_C( -772.46), SIMDE_FLOAT32_C( 457.40) }, + { SIMDE_FLOAT32_C(-246.050018), SIMDE_FLOAT32_C(1156.359985) } }, + { { SIMDE_FLOAT32_C( 295.99), SIMDE_FLOAT32_C( 653.10) }, + { SIMDE_FLOAT32_C( -120.98), SIMDE_FLOAT32_C( 945.50) }, + { SIMDE_FLOAT32_C(1241.489990), SIMDE_FLOAT32_C(774.079956) } }, + { { SIMDE_FLOAT32_C( -280.81), SIMDE_FLOAT32_C( 631.32) }, + { SIMDE_FLOAT32_C( 688.34), SIMDE_FLOAT32_C( 191.95) }, + { SIMDE_FLOAT32_C(-88.860001), SIMDE_FLOAT32_C(-57.020020) } }, + { { SIMDE_FLOAT32_C( -522.88), SIMDE_FLOAT32_C( -323.79) }, + { SIMDE_FLOAT32_C( -887.99), SIMDE_FLOAT32_C( -283.70) }, + { SIMDE_FLOAT32_C(-806.580017), SIMDE_FLOAT32_C(564.199951) } }, + { { SIMDE_FLOAT32_C( -117.76), SIMDE_FLOAT32_C( -841.45) }, + { SIMDE_FLOAT32_C( 664.94), SIMDE_FLOAT32_C( -987.19) }, + { SIMDE_FLOAT32_C(-1104.949951), SIMDE_FLOAT32_C(-1506.390015) } }, + { { SIMDE_FLOAT32_C( -642.89), SIMDE_FLOAT32_C( -152.10) }, + { SIMDE_FLOAT32_C( 963.83), SIMDE_FLOAT32_C( 919.89) }, + { SIMDE_FLOAT32_C(277.000000), SIMDE_FLOAT32_C(-1115.930054) } }, + { { SIMDE_FLOAT32_C( 630.40), SIMDE_FLOAT32_C( -669.33) }, + { SIMDE_FLOAT32_C( 671.13), SIMDE_FLOAT32_C( 256.93) }, + { SIMDE_FLOAT32_C(887.330017), SIMDE_FLOAT32_C(-1340.459961) } } + }; - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vcadd_rot270_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } - + + return 0; #else fputc('\n', stdout); - for (int i = 0; i < 8; i++) { + for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vcadd_rot270_f32(a, b); @@ -268,74 +203,51 @@ static int test_simde_vcadd_rot270_f32(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcaddq_rot270_f32(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcaddq_rot270_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { - {{SIMDE_FLOAT32_C(-337.31), SIMDE_FLOAT32_C(-857.36), - SIMDE_FLOAT32_C(334.71), SIMDE_FLOAT32_C(-617.33)}, - {SIMDE_FLOAT32_C(-439.38), SIMDE_FLOAT32_C(245.13), - SIMDE_FLOAT32_C(111.06), SIMDE_FLOAT32_C(520.69)}, - {SIMDE_FLOAT32_C(-92.179993), SIMDE_FLOAT32_C(-417.979980), - SIMDE_FLOAT32_C(855.400024), SIMDE_FLOAT32_C(-728.390015)}}, - {{SIMDE_FLOAT32_C(85.49), SIMDE_FLOAT32_C(250.19), - SIMDE_FLOAT32_C(-679.96), SIMDE_FLOAT32_C(-750.25)}, - {SIMDE_FLOAT32_C(-138.26), SIMDE_FLOAT32_C(-14.62), - SIMDE_FLOAT32_C(-921.52), SIMDE_FLOAT32_C(225.91)}, - {SIMDE_FLOAT32_C(70.869995), SIMDE_FLOAT32_C(388.450012), - SIMDE_FLOAT32_C(-454.050018), SIMDE_FLOAT32_C(171.270020)}}, - {{SIMDE_FLOAT32_C(242.83), SIMDE_FLOAT32_C(869.28), - SIMDE_FLOAT32_C(297.95), SIMDE_FLOAT32_C(105.66)}, - {SIMDE_FLOAT32_C(-722.51), SIMDE_FLOAT32_C(-802.37), - SIMDE_FLOAT32_C(-245.78), SIMDE_FLOAT32_C(915.39)}, - {SIMDE_FLOAT32_C(-559.539978), SIMDE_FLOAT32_C(1591.790039), - SIMDE_FLOAT32_C(1213.340088), SIMDE_FLOAT32_C(351.440002)}}, - {{SIMDE_FLOAT32_C(54.20), SIMDE_FLOAT32_C(-928.06), - SIMDE_FLOAT32_C(362.39), SIMDE_FLOAT32_C(-936.63)}, - {SIMDE_FLOAT32_C(185.82), SIMDE_FLOAT32_C(-244.43), - SIMDE_FLOAT32_C(924.66), SIMDE_FLOAT32_C(-643.82)}, - {SIMDE_FLOAT32_C(-190.229996), SIMDE_FLOAT32_C(-1113.880005), - SIMDE_FLOAT32_C(-281.429993), SIMDE_FLOAT32_C(-1861.290039)}}, - {{SIMDE_FLOAT32_C(-516.92), SIMDE_FLOAT32_C(-615.16), - SIMDE_FLOAT32_C(-751.52), SIMDE_FLOAT32_C(-974.04)}, - {SIMDE_FLOAT32_C(-144.42), SIMDE_FLOAT32_C(338.27), - SIMDE_FLOAT32_C(704.92), SIMDE_FLOAT32_C(116.90)}, - {SIMDE_FLOAT32_C(-178.649994), SIMDE_FLOAT32_C(-470.739990), - SIMDE_FLOAT32_C(-634.619995), SIMDE_FLOAT32_C(-1678.959961)}}, - {{SIMDE_FLOAT32_C(49.39), SIMDE_FLOAT32_C(-363.00), - SIMDE_FLOAT32_C(-476.30), SIMDE_FLOAT32_C(106.71)}, - {SIMDE_FLOAT32_C(-725.84), SIMDE_FLOAT32_C(-353.71), - SIMDE_FLOAT32_C(268.41), SIMDE_FLOAT32_C(728.83)}, - {SIMDE_FLOAT32_C(-304.320007), SIMDE_FLOAT32_C(362.840027), - SIMDE_FLOAT32_C(252.530029), SIMDE_FLOAT32_C(-161.700012)}}, - {{SIMDE_FLOAT32_C(-470.26), SIMDE_FLOAT32_C(81.87), - SIMDE_FLOAT32_C(72.27), SIMDE_FLOAT32_C(-992.59)}, - {SIMDE_FLOAT32_C(-615.67), SIMDE_FLOAT32_C(620.28), - SIMDE_FLOAT32_C(-606.51), SIMDE_FLOAT32_C(-327.77)}, - {SIMDE_FLOAT32_C(150.020020), SIMDE_FLOAT32_C(697.539978), - SIMDE_FLOAT32_C(-255.500000), SIMDE_FLOAT32_C(-386.080017)}}, - {{SIMDE_FLOAT32_C(-331.80), SIMDE_FLOAT32_C(-605.94), - SIMDE_FLOAT32_C(-295.86), SIMDE_FLOAT32_C(-275.42)}, - {SIMDE_FLOAT32_C(-752.64), SIMDE_FLOAT32_C(21.91), - SIMDE_FLOAT32_C(826.83), SIMDE_FLOAT32_C(599.82)}, - {SIMDE_FLOAT32_C(-309.889984), SIMDE_FLOAT32_C(146.700012), - SIMDE_FLOAT32_C(303.960022), SIMDE_FLOAT32_C(-1102.250000)}}}; + { { SIMDE_FLOAT32_C( -337.31), SIMDE_FLOAT32_C( -857.36), SIMDE_FLOAT32_C( 334.71), SIMDE_FLOAT32_C( -617.33) }, + { SIMDE_FLOAT32_C( -439.38), SIMDE_FLOAT32_C( 245.13), SIMDE_FLOAT32_C( 111.06), SIMDE_FLOAT32_C( 520.69) }, + { SIMDE_FLOAT32_C(-92.179993), SIMDE_FLOAT32_C(-417.979980), SIMDE_FLOAT32_C(855.400024), SIMDE_FLOAT32_C(-728.390015) } }, + { { SIMDE_FLOAT32_C( 85.49), SIMDE_FLOAT32_C( 250.19), SIMDE_FLOAT32_C( -679.96), SIMDE_FLOAT32_C( -750.25) }, + { SIMDE_FLOAT32_C( -138.26), SIMDE_FLOAT32_C( -14.62), SIMDE_FLOAT32_C( -921.52), SIMDE_FLOAT32_C( 225.91) }, + { SIMDE_FLOAT32_C(70.869995), SIMDE_FLOAT32_C(388.450012), SIMDE_FLOAT32_C(-454.050018), SIMDE_FLOAT32_C(171.270020) } }, + { { SIMDE_FLOAT32_C( 242.83), SIMDE_FLOAT32_C( 869.28), SIMDE_FLOAT32_C( 297.95), SIMDE_FLOAT32_C( 105.66) }, + { SIMDE_FLOAT32_C( -722.51), SIMDE_FLOAT32_C( -802.37), SIMDE_FLOAT32_C( -245.78), SIMDE_FLOAT32_C( 915.39) }, + { SIMDE_FLOAT32_C(-559.539978), SIMDE_FLOAT32_C(1591.790039), SIMDE_FLOAT32_C(1213.340088), SIMDE_FLOAT32_C(351.440002) } }, + { { SIMDE_FLOAT32_C( 54.20), SIMDE_FLOAT32_C( -928.06), SIMDE_FLOAT32_C( 362.39), SIMDE_FLOAT32_C( -936.63) }, + { SIMDE_FLOAT32_C( 185.82), SIMDE_FLOAT32_C( -244.43), SIMDE_FLOAT32_C( 924.66), SIMDE_FLOAT32_C( -643.82) }, + { SIMDE_FLOAT32_C(-190.229996), SIMDE_FLOAT32_C(-1113.880005), SIMDE_FLOAT32_C(-281.429993), SIMDE_FLOAT32_C(-1861.290039) } }, + { { SIMDE_FLOAT32_C( -516.92), SIMDE_FLOAT32_C( -615.16), SIMDE_FLOAT32_C( -751.52), SIMDE_FLOAT32_C( -974.04) }, + { SIMDE_FLOAT32_C( -144.42), SIMDE_FLOAT32_C( 338.27), SIMDE_FLOAT32_C( 704.92), SIMDE_FLOAT32_C( 116.90) }, + { SIMDE_FLOAT32_C(-178.649994), SIMDE_FLOAT32_C(-470.739990), SIMDE_FLOAT32_C(-634.619995), SIMDE_FLOAT32_C(-1678.959961) } }, + { { SIMDE_FLOAT32_C( 49.39), SIMDE_FLOAT32_C( -363.00), SIMDE_FLOAT32_C( -476.30), SIMDE_FLOAT32_C( 106.71) }, + { SIMDE_FLOAT32_C( -725.84), SIMDE_FLOAT32_C( -353.71), SIMDE_FLOAT32_C( 268.41), SIMDE_FLOAT32_C( 728.83) }, + { SIMDE_FLOAT32_C(-304.320007), SIMDE_FLOAT32_C(362.840027), SIMDE_FLOAT32_C(252.530029), SIMDE_FLOAT32_C(-161.700012) } }, + { { SIMDE_FLOAT32_C( -470.26), SIMDE_FLOAT32_C( 81.87), SIMDE_FLOAT32_C( 72.27), SIMDE_FLOAT32_C( -992.59) }, + { SIMDE_FLOAT32_C( -615.67), SIMDE_FLOAT32_C( 620.28), SIMDE_FLOAT32_C( -606.51), SIMDE_FLOAT32_C( -327.77) }, + { SIMDE_FLOAT32_C(150.020020), SIMDE_FLOAT32_C(697.539978), SIMDE_FLOAT32_C(-255.500000), SIMDE_FLOAT32_C(-386.080017) } }, + { { SIMDE_FLOAT32_C( -331.80), SIMDE_FLOAT32_C( -605.94), SIMDE_FLOAT32_C( -295.86), SIMDE_FLOAT32_C( -275.42) }, + { SIMDE_FLOAT32_C( -752.64), SIMDE_FLOAT32_C( 21.91), SIMDE_FLOAT32_C( 826.83), SIMDE_FLOAT32_C( 599.82) }, + { SIMDE_FLOAT32_C(-309.889984), SIMDE_FLOAT32_C(146.700012), SIMDE_FLOAT32_C(303.960022), SIMDE_FLOAT32_C(-1102.250000) } } + }; - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vcaddq_rot270_f32(a, b); - simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), - 1); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } - + return 0; #else fputc('\n', stdout); - for (int i = 0; i < 8; i++) { + for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcaddq_rot270_f32(a, b); @@ -348,50 +260,52 @@ static int test_simde_vcaddq_rot270_f32(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcaddq_rot270_f64(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcaddq_rot270_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { - {{SIMDE_FLOAT64_C(-30.36), SIMDE_FLOAT64_C(631.53)}, - {SIMDE_FLOAT64_C(850.75), SIMDE_FLOAT64_C(-263.55)}, - {SIMDE_FLOAT64_C(-293.910000), SIMDE_FLOAT64_C(-219.220000)}}, - {{SIMDE_FLOAT64_C(139.96), SIMDE_FLOAT64_C(859.14)}, - {SIMDE_FLOAT64_C(-834.47), SIMDE_FLOAT64_C(216.10)}, - {SIMDE_FLOAT64_C(356.060000), SIMDE_FLOAT64_C(1693.610000)}}, - {{SIMDE_FLOAT64_C(995.86), SIMDE_FLOAT64_C(529.74)}, - {SIMDE_FLOAT64_C(79.08), SIMDE_FLOAT64_C(947.13)}, - {SIMDE_FLOAT64_C(1942.990000), SIMDE_FLOAT64_C(450.660000)}}, - {{SIMDE_FLOAT64_C(122.02), SIMDE_FLOAT64_C(-250.00)}, - {SIMDE_FLOAT64_C(-361.82), SIMDE_FLOAT64_C(265.24)}, - {SIMDE_FLOAT64_C(387.260000), SIMDE_FLOAT64_C(111.820000)}}, - {{SIMDE_FLOAT64_C(275.71), SIMDE_FLOAT64_C(2.71)}, - {SIMDE_FLOAT64_C(99.79), SIMDE_FLOAT64_C(-137.67)}, - {SIMDE_FLOAT64_C(138.040000), SIMDE_FLOAT64_C(-97.080000)}}, - {{SIMDE_FLOAT64_C(-761.19), SIMDE_FLOAT64_C(813.19)}, - {SIMDE_FLOAT64_C(-897.68), SIMDE_FLOAT64_C(653.58)}, - {SIMDE_FLOAT64_C(-107.610000), SIMDE_FLOAT64_C(1710.870000)}}, - {{SIMDE_FLOAT64_C(396.02), SIMDE_FLOAT64_C(413.06)}, - {SIMDE_FLOAT64_C(514.09), SIMDE_FLOAT64_C(-977.67)}, - {SIMDE_FLOAT64_C(-581.650000), SIMDE_FLOAT64_C(-101.030000)}}, - {{SIMDE_FLOAT64_C(-671.79), SIMDE_FLOAT64_C(-92.13)}, - {SIMDE_FLOAT64_C(-441.32), SIMDE_FLOAT64_C(-374.27)}, - {SIMDE_FLOAT64_C(-1046.060000), SIMDE_FLOAT64_C(349.190000)}}}; + { { SIMDE_FLOAT64_C( -30.36), SIMDE_FLOAT64_C( 631.53) }, + { SIMDE_FLOAT64_C( 850.75), SIMDE_FLOAT64_C( -263.55) }, + { SIMDE_FLOAT64_C(-293.910000), SIMDE_FLOAT64_C(-219.220000) } }, + { { SIMDE_FLOAT64_C( 139.96), SIMDE_FLOAT64_C( 859.14) }, + { SIMDE_FLOAT64_C( -834.47), SIMDE_FLOAT64_C( 216.10) }, + { SIMDE_FLOAT64_C(356.060000), SIMDE_FLOAT64_C(1693.610000) } }, + { { SIMDE_FLOAT64_C( 995.86), SIMDE_FLOAT64_C( 529.74) }, + { SIMDE_FLOAT64_C( 79.08), SIMDE_FLOAT64_C( 947.13) }, + { SIMDE_FLOAT64_C(1942.990000), SIMDE_FLOAT64_C(450.660000) } }, + { { SIMDE_FLOAT64_C( 122.02), SIMDE_FLOAT64_C( -250.00) }, + { SIMDE_FLOAT64_C( -361.82), SIMDE_FLOAT64_C( 265.24) }, + { SIMDE_FLOAT64_C(387.260000), SIMDE_FLOAT64_C(111.820000) } }, + { { SIMDE_FLOAT64_C( 275.71), SIMDE_FLOAT64_C( 2.71) }, + { SIMDE_FLOAT64_C( 99.79), SIMDE_FLOAT64_C( -137.67) }, + { SIMDE_FLOAT64_C(138.040000), SIMDE_FLOAT64_C(-97.080000) } }, + { { SIMDE_FLOAT64_C( -761.19), SIMDE_FLOAT64_C( 813.19) }, + { SIMDE_FLOAT64_C( -897.68), SIMDE_FLOAT64_C( 653.58) }, + { SIMDE_FLOAT64_C(-107.610000), SIMDE_FLOAT64_C(1710.870000) } }, + { { SIMDE_FLOAT64_C( 396.02), SIMDE_FLOAT64_C( 413.06) }, + { SIMDE_FLOAT64_C( 514.09), SIMDE_FLOAT64_C( -977.67) }, + { SIMDE_FLOAT64_C(-581.650000), SIMDE_FLOAT64_C(-101.030000) } }, + { { SIMDE_FLOAT64_C( -671.79), SIMDE_FLOAT64_C( -92.13) }, + { SIMDE_FLOAT64_C( -441.32), SIMDE_FLOAT64_C( -374.27) }, + { SIMDE_FLOAT64_C(-1046.060000), SIMDE_FLOAT64_C(349.190000) } } + }; - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r = simde_vcaddq_rot270_f64(a, b); - simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), - 1); + simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } + return 0; #else fputc('\n', stdout); - for (int i = 0; i < 8; i++) { + for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0f, 1000.0f); simde_float64x2_t b = simde_test_arm_neon_random_f64x2(-1000.0f, 1000.0f); simde_float64x2_t r = simde_vcaddq_rot270_f64(a, b); @@ -412,4 +326,4 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vcaddq_rot270_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcaddq_rot270_f64) SIMDE_TEST_FUNC_LIST_END -#include "test-neon-footer.h" +#include "test-neon-footer.h" \ No newline at end of file diff --git a/test/arm/neon/cadd_rot90.c b/test/arm/neon/cadd_rot90.c index 9097bf10a..31890d427 100644 --- a/test/arm/neon/cadd_rot90.c +++ b/test/arm/neon/cadd_rot90.c @@ -6,7 +6,7 @@ static int test_simde_vcadd_rot90_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 - static const struct { + struct { simde_float16_t a[4]; simde_float16_t b[4]; simde_float16_t r[4]; @@ -86,7 +86,7 @@ static int test_simde_vcadd_rot90_f16(SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcaddq_rot90_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 - static const struct { + struct { simde_float16_t a[8]; simde_float16_t b[8]; simde_float16_t r[8]; diff --git a/test/arm/neon/cmla_lane.c b/test/arm/neon/cmla_lane.c index 41e4af538..24b6e0314 100644 --- a/test/arm/neon/cmla_lane.c +++ b/test/arm/neon/cmla_lane.c @@ -7,7 +7,7 @@ static int test_simde_vcmla_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 - static const struct { + struct { simde_float16_t r_[4]; simde_float16_t a[4]; simde_float16_t b[4]; @@ -205,7 +205,7 @@ static int test_simde_vcmla_lane_f32(SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcmla_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 - static const struct { + struct { simde_float16_t r_[4]; simde_float16_t a[4]; simde_float16_t b[8]; @@ -429,7 +429,7 @@ static int test_simde_vcmla_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcmlaq_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 - static const struct { + struct { simde_float16_t r_[8]; simde_float16_t a[8]; simde_float16_t b[4]; @@ -701,7 +701,7 @@ static int test_simde_vcmlaq_lane_f32(SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcmlaq_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 - static const struct { + struct { simde_float16_t r_[8]; simde_float16_t a[8]; simde_float16_t b[8]; diff --git a/test/arm/neon/cmla_rot180_lane.c b/test/arm/neon/cmla_rot180_lane.c index 1553af309..d727aeefd 100644 --- a/test/arm/neon/cmla_rot180_lane.c +++ b/test/arm/neon/cmla_rot180_lane.c @@ -7,7 +7,7 @@ static int test_simde_vcmla_rot180_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 - static const struct { + struct { simde_float16_t r_[4]; simde_float16_t a[4]; simde_float16_t b[4]; @@ -209,7 +209,7 @@ static int test_simde_vcmla_rot180_lane_f32(SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcmla_rot180_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 - static const struct { + struct { simde_float16_t r_[4]; simde_float16_t a[4]; simde_float16_t b[8]; @@ -438,7 +438,7 @@ static int test_simde_vcmla_rot180_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcmlaq_rot180_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 - static const struct { + struct { simde_float16_t r_[8]; simde_float16_t a[8]; simde_float16_t b[4]; @@ -712,7 +712,7 @@ static int test_simde_vcmlaq_rot180_lane_f32(SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcmlaq_rot180_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 - static const struct { + struct { simde_float16_t r_[8]; simde_float16_t a[8]; simde_float16_t b[8]; diff --git a/test/arm/neon/cmla_rot270_lane.c b/test/arm/neon/cmla_rot270_lane.c index 9242fff16..5cf034919 100644 --- a/test/arm/neon/cmla_rot270_lane.c +++ b/test/arm/neon/cmla_rot270_lane.c @@ -7,7 +7,7 @@ static int test_simde_vcmla_rot270_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 - static const struct { + struct { simde_float16_t r_[4]; simde_float16_t a[4]; simde_float16_t b[4]; @@ -203,7 +203,7 @@ static int test_simde_vcmla_rot270_lane_f32(SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcmla_rot270_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 - static const struct { + struct { simde_float16_t r_[4]; simde_float16_t a[4]; simde_float16_t b[8]; @@ -425,7 +425,7 @@ static int test_simde_vcmla_rot270_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcmlaq_rot270_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 - static const struct { + struct { simde_float16_t r_[8]; simde_float16_t a[8]; simde_float16_t b[4]; @@ -695,7 +695,7 @@ static int test_simde_vcmlaq_rot270_lane_f32(SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcmlaq_rot270_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 - static const struct { + struct { simde_float16_t r_[8]; simde_float16_t a[8]; simde_float16_t b[8]; diff --git a/test/arm/neon/cmla_rot90_lane.c b/test/arm/neon/cmla_rot90_lane.c index 37812cf17..f6dc1eda5 100644 --- a/test/arm/neon/cmla_rot90_lane.c +++ b/test/arm/neon/cmla_rot90_lane.c @@ -7,7 +7,7 @@ static int test_simde_vcmla_rot90_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 - static const struct { + struct { simde_float16_t r_[4]; simde_float16_t a[4]; simde_float16_t b[4]; @@ -208,7 +208,7 @@ static int test_simde_vcmla_rot90_lane_f32(SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcmla_rot90_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 - static const struct { + struct { simde_float16_t r_[4]; simde_float16_t a[4]; simde_float16_t b[8]; @@ -436,7 +436,7 @@ static int test_simde_vcmla_rot90_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcmlaq_rot90_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 - static const struct { + struct { simde_float16_t r_[8]; simde_float16_t a[8]; simde_float16_t b[4]; @@ -712,7 +712,7 @@ static int test_simde_vcmlaq_rot90_lane_f32(SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcmlaq_rot90_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #if 1 - static const struct { + struct { simde_float16_t r_[8]; simde_float16_t a[8]; simde_float16_t b[8]; From 9f6cace72efe04b70bb23a29f53fc862878361d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Mon, 16 Oct 2023 23:10:22 +0800 Subject: [PATCH 16/29] [Fix] : f16 intrinsic of cadd_rot270 and cadd_rot90 --- simde/arm/neon/cadd_rot270.h | 47 +++++++++++++++++++------------- simde/arm/neon/cadd_rot90.h | 53 ++++++++++++++++++++++-------------- 2 files changed, 60 insertions(+), 40 deletions(-) diff --git a/simde/arm/neon/cadd_rot270.h b/simde/arm/neon/cadd_rot270.h index d3d1c0d26..486442fe3 100644 --- a/simde/arm/neon/cadd_rot270.h +++ b/simde/arm/neon/cadd_rot270.h @@ -43,16 +43,20 @@ simde_float16x4_t simde_vcadd_rot270_f16(simde_float16x4_t a, #else simde_float16x4_private r_, a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16(b_.values[2 * i + 1], a_.values[2 * i]); - r_.values[2 * i + 1] = simde_vaddh_f16( - simde_float16_from_float32(-simde_float16_to_float32(b_.values[2 * i])), - a_.values[2 * i + 1]); - } - + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_vaddh_f16(b_.values[2 * i + 1], a_.values[2 * i]); + r_.values[2 * i + 1] = simde_vaddh_f16( + simde_float16_from_float32(-simde_float16_to_float32(b_.values[2 * i])), + a_.values[2 * i + 1]); + } + #endif return simde_float16x4_from_private(r_); #endif } @@ -72,15 +76,20 @@ simde_float16x8_t simde_vcaddq_rot270_f16(simde_float16x8_t a, simde_float16x8_private r_, a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(b); - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16(b_.values[2 * i + 1], a_.values[2 * i]); - r_.values[2 * i + 1] = simde_vaddh_f16( - simde_float16_from_float32(-simde_float16_to_float32(b_.values[2 * i])), - a_.values[2 * i + 1]); - } - + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, 13, 4, 15, 6); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_vaddh_f16(b_.values[2 * i + 1], a_.values[2 * i]); + r_.values[2 * i + 1] = simde_vaddh_f16( + simde_float16_from_float32(-simde_float16_to_float32(b_.values[2 * i])), + a_.values[2 * i + 1]); + } + #endif return simde_float16x8_from_private(r_); #endif } diff --git a/simde/arm/neon/cadd_rot90.h b/simde/arm/neon/cadd_rot90.h index df409c1fc..2016c8865 100644 --- a/simde/arm/neon/cadd_rot90.h +++ b/simde/arm/neon/cadd_rot90.h @@ -43,17 +43,22 @@ simde_float16x4_t simde_vcadd_rot90_f16(simde_float16x4_t a, #else simde_float16x4_private r_, a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = - simde_vaddh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i + 1])), - a_.values[2 * i]); - r_.values[2 * i + 1] = - simde_vaddh_f16(b_.values[2 * i], a_.values[2 * i + 1]); - } + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = + simde_vaddh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i + 1])), + a_.values[2 * i]); + r_.values[2 * i + 1] = + simde_vaddh_f16(b_.values[2 * i], a_.values[2 * i + 1]); + } + #endif return simde_float16x4_from_private(r_); #endif } @@ -72,16 +77,22 @@ simde_float16x8_t simde_vcaddq_rot90_f16(simde_float16x8_t a, #else simde_float16x8_private r_, a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(b); - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = - simde_vaddh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i + 1])), - a_.values[2 * i]); - r_.values[2 * i + 1] = - simde_vaddh_f16(b_.values[2 * i], a_.values[2 * i + 1]); - } + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, 5, 12, 7, 14); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = + simde_vaddh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i + 1])), + a_.values[2 * i]); + r_.values[2 * i + 1] = + simde_vaddh_f16(b_.values[2 * i], a_.values[2 * i + 1]); + } + #endif return simde_float16x8_from_private(r_); #endif } From dfc4481ca3bfef342257126c3b66db26bb189b5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Mon, 16 Oct 2023 23:11:22 +0800 Subject: [PATCH 17/29] [Fix] : f16 intrinsics --- simde/arm/neon/cmla_lane.h | 84 ++++++++++--------- simde/arm/neon/cmla_rot180_lane.h | 112 ++++++++++++------------- simde/arm/neon/cmla_rot270_lane.h | 100 ++++++++++++----------- simde/arm/neon/cmla_rot90_lane.h | 130 ++++++++++++++++-------------- 4 files changed, 226 insertions(+), 200 deletions(-) diff --git a/simde/arm/neon/cmla_lane.h b/simde/arm/neon/cmla_lane.h index 91aa94722..83e93d613 100644 --- a/simde/arm/neon/cmla_lane.h +++ b/simde/arm/neon/cmla_lane.h @@ -52,15 +52,18 @@ simde_float16x4_t simde_vcmla_lane_f16(simde_float16x4_t r, simde_float16x4_t a, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x4_to_private(b).values[lane])); - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16( - r_.values[2 * i], simde_vmulh_f16(b_.values[lane], a_.values[2 * i])); - r_.values[2 * i + 1] = - simde_vaddh_f16(r_.values[2 * i + 1], - simde_vmulh_f16(b_.values[lane], a_.values[2 * i])); - } + #if defined(SIMDE_SHUFFLE_VECTOR_) && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + simde_float16_to_float32(b_.values[lane]) * simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + simde_float16_to_float32(b_.values[lane]) * simde_float16_to_float32(a_.values[2 * i])); + } + #endif result = simde_float16x4_from_private(r_); return result; #endif @@ -121,15 +124,18 @@ simde_float16x4_t simde_vcmla_laneq_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane])); - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16( - r_.values[2 * i], simde_vmulh_f16(b_.values[lane], a_.values[2 * i])); - r_.values[2 * i + 1] = - simde_vaddh_f16(r_.values[2 * i + 1], - simde_vmulh_f16(b_.values[lane], a_.values[2 * i])); - } + #if defined(SIMDE_SHUFFLE_VECTOR_) && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + simde_float16_to_float32(b_.values[lane]) * simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + simde_float16_to_float32(b_.values[lane]) * simde_float16_to_float32(a_.values[2 * i])); + } + #endif result = simde_float16x4_from_private(r_); return result; #endif @@ -193,15 +199,18 @@ simde_float16x8_t simde_vcmlaq_lane_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x4_to_private(b).values[lane])); - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16( - r_.values[2 * i], simde_vmulh_f16(b_.values[lane], a_.values[2 * i])); - r_.values[2 * i + 1] = - simde_vaddh_f16(r_.values[2 * i + 1], - simde_vmulh_f16(b_.values[lane], a_.values[2 * i])); - } + #if defined(SIMDE_SHUFFLE_VECTOR_) && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + simde_float16_to_float32(b_.values[lane]) * simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + simde_float16_to_float32(b_.values[lane]) * simde_float16_to_float32(a_.values[2 * i])); + } + #endif result = simde_float16x8_from_private(r_); return result; #endif @@ -263,15 +272,18 @@ simde_float16x8_t simde_vcmlaq_laneq_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x8_to_private(b).values[lane])); - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16( - r_.values[2 * i], simde_vmulh_f16(b_.values[lane], a_.values[2 * i])); - r_.values[2 * i + 1] = - simde_vaddh_f16(r_.values[2 * i + 1], - simde_vmulh_f16(b_.values[lane], a_.values[2 * i])); - } + #if defined(SIMDE_SHUFFLE_VECTOR_) && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + simde_float16_to_float32(b_.values[lane]) * simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + simde_float16_to_float32(b_.values[lane]) * simde_float16_to_float32(a_.values[2 * i])); + } + #endif result = simde_float16x8_from_private(r_); return result; #endif diff --git a/simde/arm/neon/cmla_rot180_lane.h b/simde/arm/neon/cmla_rot180_lane.h index 753e13b71..876f1f530 100644 --- a/simde/arm/neon/cmla_rot180_lane.h +++ b/simde/arm/neon/cmla_rot180_lane.h @@ -54,20 +54,20 @@ simde_float16x4_t simde_vcmla_rot180_lane_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x4_to_private(b).values[lane])); - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16( - r_.values[2 * i], - simde_vmulh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i])), - a_.values[2 * i])); - r_.values[2 * i + 1] = simde_vaddh_f16( - r_.values[2 * i + 1], - simde_vmulh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i + 1])), - a_.values[2 * i])); - } + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + (-simde_float16_to_float32(b_.values[2 * i])) * simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + (-simde_float16_to_float32(b_.values[2 * i + 1])) * simde_float16_to_float32(a_.values[2 * i])); + } + #endif result = simde_float16x4_from_private(r_); return result; @@ -135,20 +135,20 @@ simde_float16x8_t simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x4_to_private(b).values[lane])); - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16( - r_.values[2 * i], - simde_vmulh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i])), - a_.values[2 * i])); - r_.values[2 * i + 1] = simde_vaddh_f16( - r_.values[2 * i + 1], - simde_vmulh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i + 1])), - a_.values[2 * i])); - } + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) \ + && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, 5, 6, 7); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + (-simde_float16_to_float32(b_.values[2 * i])) * simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + (-simde_float16_to_float32(b_.values[2 * i + 1])) * simde_float16_to_float32(a_.values[2 * i])); + } + #endif result = simde_float16x8_from_private(r_); return result; @@ -216,20 +216,20 @@ simde_float16x4_t simde_vcmla_rot180_laneq_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane])); - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16( - r_.values[2 * i], - simde_vmulh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i])), - a_.values[2 * i])); - r_.values[2 * i + 1] = simde_vaddh_f16( - r_.values[2 * i + 1], - simde_vmulh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i + 1])), - a_.values[2 * i])); - } + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) \ + && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + (-simde_float16_to_float32(b_.values[2 * i])) * simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + (-simde_float16_to_float32(b_.values[2 * i + 1])) * simde_float16_to_float32(a_.values[2 * i])); + } + #endif result = simde_float16x4_from_private(r_); return result; @@ -300,20 +300,20 @@ simde_float16x8_t simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x8_to_private(b).values[lane])); - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16( - r_.values[2 * i], - simde_vmulh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i])), - a_.values[2 * i])); - r_.values[2 * i + 1] = simde_vaddh_f16( - r_.values[2 * i + 1], - simde_vmulh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i + 1])), - a_.values[2 * i])); - } + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) \ + && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, 5, 6, 7); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + (-simde_float16_to_float32(b_.values[2 * i])) * simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + (-simde_float16_to_float32(b_.values[2 * i + 1])) * simde_float16_to_float32(a_.values[2 * i])); + } + #endif result = simde_float16x8_from_private(r_); return result; diff --git a/simde/arm/neon/cmla_rot270_lane.h b/simde/arm/neon/cmla_rot270_lane.h index d2fc749e2..05b1a7653 100644 --- a/simde/arm/neon/cmla_rot270_lane.h +++ b/simde/arm/neon/cmla_rot270_lane.h @@ -53,18 +53,19 @@ simde_float16x4_t simde_vcmla_rot270_lane_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x4_to_private(b).values[lane])); - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16( - r_.values[2 * i], - simde_vmulh_f16(b_.values[2 * i + 1], a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_vaddh_f16( - r_.values[2 * i + 1], - simde_vmulh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i])), - a_.values[2 * i + 1])); - } + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + simde_float16_to_float32(b_.values[2 * i + 1]) * simde_float16_to_float32(a_.values[2 * i + 1])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + (-simde_float16_to_float32(b_.values[2 * i])) * simde_float16_to_float32(a_.values[2 * i + 1])); + } + #endif result = simde_float16x4_from_private(r_); return result; @@ -132,18 +133,19 @@ simde_float16x8_t simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x4_to_private(b).values[lane])); - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16( - r_.values[2 * i], - simde_vmulh_f16(b_.values[2 * i + 1], a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_vaddh_f16( - r_.values[2 * i + 1], - simde_vmulh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i])), - a_.values[2 * i + 1])); - } + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, 13, 4, 15, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + simde_float16_to_float32(b_.values[2 * i + 1]) * simde_float16_to_float32(a_.values[2 * i + 1])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + (-simde_float16_to_float32(b_.values[2 * i])) * simde_float16_to_float32(a_.values[2 * i + 1])); + } + #endif result = simde_float16x8_from_private(r_); return result; @@ -211,18 +213,19 @@ simde_float16x4_t simde_vcmla_rot270_laneq_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane])); - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16( - r_.values[2 * i], - simde_vmulh_f16(b_.values[2 * i + 1], a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_vaddh_f16( - r_.values[2 * i + 1], - simde_vmulh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i])), - a_.values[2 * i + 1])); - } + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + simde_float16_to_float32(b_.values[2 * i + 1]) * simde_float16_to_float32(a_.values[2 * i + 1])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + (-simde_float16_to_float32(b_.values[2 * i])) * simde_float16_to_float32(a_.values[2 * i + 1])); + } + #endif result = simde_float16x4_from_private(r_); return result; @@ -293,18 +296,19 @@ simde_float16x8_t simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x8_to_private(b).values[lane])); - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16( - r_.values[2 * i], - simde_vmulh_f16(b_.values[2 * i + 1], a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_vaddh_f16( - r_.values[2 * i + 1], - simde_vmulh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i])), - a_.values[2 * i + 1])); - } + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, 13, 4, 15, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + simde_float16_to_float32(b_.values[2 * i + 1]) * simde_float16_to_float32(a_.values[2 * i + 1])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + (-simde_float16_to_float32(b_.values[2 * i])) * simde_float16_to_float32(a_.values[2 * i + 1])); + } + #endif result = simde_float16x8_from_private(r_); return result; diff --git a/simde/arm/neon/cmla_rot90_lane.h b/simde/arm/neon/cmla_rot90_lane.h index ef3113a37..466cd91f3 100644 --- a/simde/arm/neon/cmla_rot90_lane.h +++ b/simde/arm/neon/cmla_rot90_lane.h @@ -31,6 +31,7 @@ #include "dup_lane.h" #include "mul.h" #include "types.h" +#include "cvt.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ @@ -48,24 +49,24 @@ simde_float16x4_t simde_vcmla_rot90_lane_f16(simde_float16x4_t r, SIMDE_CONSTIFY_2_(vcmla_rot90_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); #else - simde_float16x4_private r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16( - simde_float16x4_to_private(b).values[lane])); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16( - r_.values[2 * i], - simde_vmulh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i + 1])), - a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_vaddh_f16( - r_.values[2 * i + 1], - simde_vmulh_f16(b_.values[2 * i], a_.values[2 * i + 1])); - } - result = simde_float16x4_from_private(r_); + simde_float32x4_private r_ = simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x4_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } + #endif + result = simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); return result; #endif } @@ -130,20 +131,23 @@ simde_float16x8_t simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x4_to_private(b).values[lane])); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16( - r_.values[2 * i], - simde_vmulh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i + 1])), - a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_vaddh_f16( - r_.values[2 * i + 1], - simde_vmulh_f16(b_.values[2 * i], a_.values[2 * i + 1])); - } - + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, 5, 12, 7, 14); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + (-simde_float16_to_float32(b_.values[2 * i + 1]) * simde_float16_to_float32(a_.values[2 * i + 1]))); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (simde_float16_to_float32(b_.values[2 * i]) * simde_float16_to_float32(a_.values[2 * i + 1]))); + } + #endif result = simde_float16x8_from_private(r_); return result; #endif @@ -209,20 +213,23 @@ simde_float16x4_t simde_vcmla_rot90_laneq_f16(simde_float16x4_t r, a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane])); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16( - r_.values[2 * i], - simde_vmulh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i + 1])), - a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_vaddh_f16( - r_.values[2 * i + 1], - simde_vmulh_f16(b_.values[2 * i], a_.values[2 * i + 1])); - } - + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + (-simde_float16_to_float32(b_.values[2 * i + 1]) * simde_float16_to_float32(a_.values[2 * i + 1]))); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (simde_float16_to_float32(b_.values[2 * i]) * simde_float16_to_float32(a_.values[2 * i + 1]))); + } + #endif result = simde_float16x4_from_private(r_); return result; #endif @@ -291,20 +298,23 @@ simde_float16x8_t simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x8_to_private(b).values[lane])); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16( - r_.values[2 * i], - simde_vmulh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i + 1])), - a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_vaddh_f16( - r_.values[2 * i + 1], - simde_vmulh_f16(b_.values[2 * i], a_.values[2 * i + 1])); - } - + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, 5, 12, 7, 14); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + (-simde_float16_to_float32(b_.values[2 * i + 1]) * simde_float16_to_float32(a_.values[2 * i + 1]))); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (simde_float16_to_float32(b_.values[2 * i]) * simde_float16_to_float32(a_.values[2 * i + 1]))); + } + #endif result = simde_float16x8_from_private(r_); return result; #endif From d2621800d9118b438d6c3df40c037ed2f7adec81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Mon, 16 Oct 2023 23:13:10 +0800 Subject: [PATCH 18/29] [Fix] : format the code --- simde/arm/neon/cadd_rot270.h | 59 ++++++------ simde/arm/neon/cadd_rot90.h | 67 +++++++------- simde/arm/neon/cmla_lane.h | 126 ++++++++++++++++---------- simde/arm/neon/cmla_rot180_lane.h | 140 +++++++++++++++++------------ simde/arm/neon/cmla_rot270_lane.h | 136 +++++++++++++++++----------- simde/arm/neon/cmla_rot90_lane.h | 143 +++++++++++++++++------------- 6 files changed, 392 insertions(+), 279 deletions(-) diff --git a/simde/arm/neon/cadd_rot270.h b/simde/arm/neon/cadd_rot270.h index 486442fe3..d22785c92 100644 --- a/simde/arm/neon/cadd_rot270.h +++ b/simde/arm/neon/cadd_rot270.h @@ -43,20 +43,21 @@ simde_float16x4_t simde_vcadd_rot270_f16(simde_float16x4_t a, #else simde_float16x4_private r_, a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(b); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16(b_.values[2 * i + 1], a_.values[2 * i]); - r_.values[2 * i + 1] = simde_vaddh_f16( - simde_float16_from_float32(-simde_float16_to_float32(b_.values[2 * i])), - a_.values[2 * i + 1]); - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); + r_.values = b_.values + a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_vaddh_f16(b_.values[2 * i + 1], a_.values[2 * i]); + r_.values[2 * i + 1] = simde_vaddh_f16( + simde_float16_from_float32(-simde_float16_to_float32(b_.values[2 * i])), + a_.values[2 * i + 1]); + } +#endif return simde_float16x4_from_private(r_); #endif } @@ -76,20 +77,22 @@ simde_float16x8_t simde_vcaddq_rot270_f16(simde_float16x8_t a, simde_float16x8_private r_, a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(b); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, 13, 4, 15, 6); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16(b_.values[2 * i + 1], a_.values[2 * i]); - r_.values[2 * i + 1] = simde_vaddh_f16( - simde_float16_from_float32(-simde_float16_to_float32(b_.values[2 * i])), - a_.values[2 * i + 1]); - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, + 13, 4, 15, 6); + r_.values = b_.values + a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_vaddh_f16(b_.values[2 * i + 1], a_.values[2 * i]); + r_.values[2 * i + 1] = simde_vaddh_f16( + simde_float16_from_float32(-simde_float16_to_float32(b_.values[2 * i])), + a_.values[2 * i + 1]); + } +#endif return simde_float16x8_from_private(r_); #endif } diff --git a/simde/arm/neon/cadd_rot90.h b/simde/arm/neon/cadd_rot90.h index 2016c8865..2f172a860 100644 --- a/simde/arm/neon/cadd_rot90.h +++ b/simde/arm/neon/cadd_rot90.h @@ -43,22 +43,23 @@ simde_float16x4_t simde_vcadd_rot90_f16(simde_float16x4_t a, #else simde_float16x4_private r_, a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(b); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = - simde_vaddh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i + 1])), - a_.values[2 * i]); - r_.values[2 * i + 1] = - simde_vaddh_f16(b_.values[2 * i], a_.values[2 * i + 1]); - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); + r_.values = b_.values + a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = + simde_vaddh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i + 1])), + a_.values[2 * i]); + r_.values[2 * i + 1] = + simde_vaddh_f16(b_.values[2 * i], a_.values[2 * i + 1]); + } +#endif return simde_float16x4_from_private(r_); #endif } @@ -77,22 +78,24 @@ simde_float16x8_t simde_vcaddq_rot90_f16(simde_float16x8_t a, #else simde_float16x8_private r_, a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(b); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, 5, 12, 7, 14); - r_.values = b_.values + a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = - simde_vaddh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i + 1])), - a_.values[2 * i]); - r_.values[2 * i + 1] = - simde_vaddh_f16(b_.values[2 * i], a_.values[2 * i + 1]); - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, + 5, 12, 7, 14); + r_.values = b_.values + a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = + simde_vaddh_f16(simde_float16_from_float32( + -simde_float16_to_float32(b_.values[2 * i + 1])), + a_.values[2 * i]); + r_.values[2 * i + 1] = + simde_vaddh_f16(b_.values[2 * i], a_.values[2 * i + 1]); + } +#endif return simde_float16x8_from_private(r_); #endif } diff --git a/simde/arm/neon/cmla_lane.h b/simde/arm/neon/cmla_lane.h index 83e93d613..c6c81b77d 100644 --- a/simde/arm/neon/cmla_lane.h +++ b/simde/arm/neon/cmla_lane.h @@ -52,18 +52,25 @@ simde_float16x4_t simde_vcmla_lane_f16(simde_float16x4_t r, simde_float16x4_t a, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + simde_float16_to_float32(b_.values[lane]) * simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + simde_float16_to_float32(b_.values[lane]) * simde_float16_to_float32(a_.values[2 * i])); - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + simde_float16_to_float32(b_.values[lane]) * + simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + simde_float16_to_float32(b_.values[lane]) * + simde_float16_to_float32(a_.values[2 * i])); + } +#endif result = simde_float16x4_from_private(r_); return result; #endif @@ -124,18 +131,25 @@ simde_float16x4_t simde_vcmla_laneq_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + simde_float16_to_float32(b_.values[lane]) * simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + simde_float16_to_float32(b_.values[lane]) * simde_float16_to_float32(a_.values[2 * i])); - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + simde_float16_to_float32(b_.values[lane]) * + simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + simde_float16_to_float32(b_.values[lane]) * + simde_float16_to_float32(a_.values[2 * i])); + } +#endif result = simde_float16x4_from_private(r_); return result; #endif @@ -199,18 +213,26 @@ simde_float16x8_t simde_vcmlaq_lane_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + simde_float16_to_float32(b_.values[lane]) * simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + simde_float16_to_float32(b_.values[lane]) * simde_float16_to_float32(a_.values[2 * i])); - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, + 4, 6, 6); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + simde_float16_to_float32(b_.values[lane]) * + simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + simde_float16_to_float32(b_.values[lane]) * + simde_float16_to_float32(a_.values[2 * i])); + } +#endif result = simde_float16x8_from_private(r_); return result; #endif @@ -272,18 +294,26 @@ simde_float16x8_t simde_vcmlaq_laneq_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x8_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + simde_float16_to_float32(b_.values[lane]) * simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + simde_float16_to_float32(b_.values[lane]) * simde_float16_to_float32(a_.values[2 * i])); - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, + 4, 6, 6); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + simde_float16_to_float32(b_.values[lane]) * + simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + simde_float16_to_float32(b_.values[lane]) * + simde_float16_to_float32(a_.values[2 * i])); + } +#endif result = simde_float16x8_from_private(r_); return result; #endif diff --git a/simde/arm/neon/cmla_rot180_lane.h b/simde/arm/neon/cmla_rot180_lane.h index 876f1f530..a8af95764 100644 --- a/simde/arm/neon/cmla_rot180_lane.h +++ b/simde/arm/neon/cmla_rot180_lane.h @@ -54,20 +54,26 @@ simde_float16x4_t simde_vcmla_rot180_lane_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + (-simde_float16_to_float32(b_.values[2 * i])) * simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + (-simde_float16_to_float32(b_.values[2 * i + 1])) * simde_float16_to_float32(a_.values[2 * i])); - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + (-simde_float16_to_float32(b_.values[2 * i])) * + simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (-simde_float16_to_float32(b_.values[2 * i + 1])) * + simde_float16_to_float32(a_.values[2 * i])); + } +#endif result = simde_float16x4_from_private(r_); return result; @@ -135,20 +141,28 @@ simde_float16x8_t simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) \ - && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, 5, 6, 7); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + (-simde_float16_to_float32(b_.values[2 * i])) * simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + (-simde_float16_to_float32(b_.values[2 * i + 1])) * simde_float16_to_float32(a_.values[2 * i])); - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, + 4, 6, 6); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, + 5, 6, 7); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + (-simde_float16_to_float32(b_.values[2 * i])) * + simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (-simde_float16_to_float32(b_.values[2 * i + 1])) * + simde_float16_to_float32(a_.values[2 * i])); + } +#endif result = simde_float16x8_from_private(r_); return result; @@ -216,20 +230,26 @@ simde_float16x4_t simde_vcmla_rot180_laneq_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) \ - && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + (-simde_float16_to_float32(b_.values[2 * i])) * simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + (-simde_float16_to_float32(b_.values[2 * i + 1])) * simde_float16_to_float32(a_.values[2 * i])); - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + (-simde_float16_to_float32(b_.values[2 * i])) * + simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (-simde_float16_to_float32(b_.values[2 * i + 1])) * + simde_float16_to_float32(a_.values[2 * i])); + } +#endif result = simde_float16x4_from_private(r_); return result; @@ -300,20 +320,28 @@ simde_float16x8_t simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x8_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) \ - && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, 5, 6, 7); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + (-simde_float16_to_float32(b_.values[2 * i])) * simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + (-simde_float16_to_float32(b_.values[2 * i + 1])) * simde_float16_to_float32(a_.values[2 * i])); - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, + 4, 6, 6); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, + 5, 6, 7); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + (-simde_float16_to_float32(b_.values[2 * i])) * + simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (-simde_float16_to_float32(b_.values[2 * i + 1])) * + simde_float16_to_float32(a_.values[2 * i])); + } +#endif result = simde_float16x8_from_private(r_); return result; diff --git a/simde/arm/neon/cmla_rot270_lane.h b/simde/arm/neon/cmla_rot270_lane.h index 05b1a7653..3d3cd730c 100644 --- a/simde/arm/neon/cmla_rot270_lane.h +++ b/simde/arm/neon/cmla_rot270_lane.h @@ -53,19 +53,26 @@ simde_float16x4_t simde_vcmla_rot270_lane_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + simde_float16_to_float32(b_.values[2 * i + 1]) * simde_float16_to_float32(a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + (-simde_float16_to_float32(b_.values[2 * i])) * simde_float16_to_float32(a_.values[2 * i + 1])); - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + simde_float16_to_float32(b_.values[2 * i + 1]) * + simde_float16_to_float32(a_.values[2 * i + 1])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (-simde_float16_to_float32(b_.values[2 * i])) * + simde_float16_to_float32(a_.values[2 * i + 1])); + } +#endif result = simde_float16x4_from_private(r_); return result; @@ -133,19 +140,28 @@ simde_float16x8_t simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, 13, 4, 15, 6); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + simde_float16_to_float32(b_.values[2 * i + 1]) * simde_float16_to_float32(a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + (-simde_float16_to_float32(b_.values[2 * i])) * simde_float16_to_float32(a_.values[2 * i + 1])); - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, + 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, + 13, 4, 15, 6); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + simde_float16_to_float32(b_.values[2 * i + 1]) * + simde_float16_to_float32(a_.values[2 * i + 1])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (-simde_float16_to_float32(b_.values[2 * i])) * + simde_float16_to_float32(a_.values[2 * i + 1])); + } +#endif result = simde_float16x8_from_private(r_); return result; @@ -213,19 +229,26 @@ simde_float16x4_t simde_vcmla_rot270_laneq_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + simde_float16_to_float32(b_.values[2 * i + 1]) * simde_float16_to_float32(a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + (-simde_float16_to_float32(b_.values[2 * i])) * simde_float16_to_float32(a_.values[2 * i + 1])); - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + simde_float16_to_float32(b_.values[2 * i + 1]) * + simde_float16_to_float32(a_.values[2 * i + 1])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (-simde_float16_to_float32(b_.values[2 * i])) * + simde_float16_to_float32(a_.values[2 * i + 1])); + } +#endif result = simde_float16x4_from_private(r_); return result; @@ -296,19 +319,28 @@ simde_float16x8_t simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x8_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, 13, 4, 15, 6); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + simde_float16_to_float32(b_.values[2 * i + 1]) * simde_float16_to_float32(a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + (-simde_float16_to_float32(b_.values[2 * i])) * simde_float16_to_float32(a_.values[2 * i + 1])); - } - #endif +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, + 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, + 13, 4, 15, 6); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + simde_float16_to_float32(b_.values[2 * i + 1]) * + simde_float16_to_float32(a_.values[2 * i + 1])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (-simde_float16_to_float32(b_.values[2 * i])) * + simde_float16_to_float32(a_.values[2 * i + 1])); + } +#endif result = simde_float16x8_from_private(r_); return result; diff --git a/simde/arm/neon/cmla_rot90_lane.h b/simde/arm/neon/cmla_rot90_lane.h index 466cd91f3..e146661c6 100644 --- a/simde/arm/neon/cmla_rot90_lane.h +++ b/simde/arm/neon/cmla_rot90_lane.h @@ -28,10 +28,10 @@ #define SIMDE_ARM_NEON_CMLA_ROT90_LANE_H #include "add.h" +#include "cvt.h" #include "dup_lane.h" #include "mul.h" #include "types.h" -#include "cvt.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ @@ -49,23 +49,27 @@ simde_float16x4_t simde_vcmla_rot90_lane_f16(simde_float16x4_t r, SIMDE_CONSTIFY_2_(vcmla_rot90_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); #else - simde_float32x4_private r_ = simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x4_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } - #endif + simde_float32x4_private r_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x4_to_private(b).values[lane]))); +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } +#endif result = simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); return result; #endif @@ -131,23 +135,28 @@ simde_float16x8_t simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, 5, 12, 7, 14); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, + 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, + 5, 12, 7, 14); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( simde_float16_to_float32(r_.values[2 * i]) + - (-simde_float16_to_float32(b_.values[2 * i + 1]) * simde_float16_to_float32(a_.values[2 * i + 1]))); - r_.values[2 * i + 1] = simde_float16_from_float32( + (-simde_float16_to_float32(b_.values[2 * i + 1]) * + simde_float16_to_float32(a_.values[2 * i + 1]))); + r_.values[2 * i + 1] = simde_float16_from_float32( simde_float16_to_float32(r_.values[2 * i + 1]) + - (simde_float16_to_float32(b_.values[2 * i]) * simde_float16_to_float32(a_.values[2 * i + 1]))); - } - #endif + (simde_float16_to_float32(b_.values[2 * i]) * + simde_float16_to_float32(a_.values[2 * i + 1]))); + } +#endif result = simde_float16x8_from_private(r_); return result; #endif @@ -213,23 +222,26 @@ simde_float16x4_t simde_vcmla_rot90_laneq_f16(simde_float16x4_t r, a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( simde_float16_to_float32(r_.values[2 * i]) + - (-simde_float16_to_float32(b_.values[2 * i + 1]) * simde_float16_to_float32(a_.values[2 * i + 1]))); - r_.values[2 * i + 1] = simde_float16_from_float32( + (-simde_float16_to_float32(b_.values[2 * i + 1]) * + simde_float16_to_float32(a_.values[2 * i + 1]))); + r_.values[2 * i + 1] = simde_float16_from_float32( simde_float16_to_float32(r_.values[2 * i + 1]) + - (simde_float16_to_float32(b_.values[2 * i]) * simde_float16_to_float32(a_.values[2 * i + 1]))); - } - #endif + (simde_float16_to_float32(b_.values[2 * i]) * + simde_float16_to_float32(a_.values[2 * i + 1]))); + } +#endif result = simde_float16x4_from_private(r_); return result; #endif @@ -298,23 +310,28 @@ simde_float16x8_t simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x8_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, 5, 12, 7, 14); - r_.values += b_.values * a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( +#if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, + 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, + 5, 12, 7, 14); + r_.values += b_.values * a_.values; +#else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( simde_float16_to_float32(r_.values[2 * i]) + - (-simde_float16_to_float32(b_.values[2 * i + 1]) * simde_float16_to_float32(a_.values[2 * i + 1]))); - r_.values[2 * i + 1] = simde_float16_from_float32( + (-simde_float16_to_float32(b_.values[2 * i + 1]) * + simde_float16_to_float32(a_.values[2 * i + 1]))); + r_.values[2 * i + 1] = simde_float16_from_float32( simde_float16_to_float32(r_.values[2 * i + 1]) + - (simde_float16_to_float32(b_.values[2 * i]) * simde_float16_to_float32(a_.values[2 * i + 1]))); - } - #endif + (simde_float16_to_float32(b_.values[2 * i]) * + simde_float16_to_float32(a_.values[2 * i + 1]))); + } +#endif result = simde_float16x8_from_private(r_); return result; #endif From 0880daca14f4059c0be79f93fb7eb6b86c24186f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Mon, 16 Oct 2023 23:51:36 +0800 Subject: [PATCH 19/29] [Fix] : add newline in test/arm/neon/cadd_rot270.c --- test/arm/neon/cadd_rot270.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/arm/neon/cadd_rot270.c b/test/arm/neon/cadd_rot270.c index 937a99c77..b3262e138 100644 --- a/test/arm/neon/cadd_rot270.c +++ b/test/arm/neon/cadd_rot270.c @@ -326,4 +326,4 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vcaddq_rot270_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcaddq_rot270_f64) SIMDE_TEST_FUNC_LIST_END -#include "test-neon-footer.h" \ No newline at end of file +#include "test-neon-footer.h" From d3fab5bf9a88f5d435cc34f894ae32ce7806923f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Tue, 17 Oct 2023 10:44:06 +0800 Subject: [PATCH 20/29] [Fix] : remove comment for test code --- test/arm/neon/cmla_lane.c | 7 ------- test/arm/neon/cmla_rot180_lane.c | 24 ------------------------ test/arm/neon/cmla_rot270_lane.c | 4 ---- test/arm/neon/cmla_rot90_lane.c | 24 ------------------------ 4 files changed, 59 deletions(-) diff --git a/test/arm/neon/cmla_lane.c b/test/arm/neon/cmla_lane.c index 24b6e0314..0530e8322 100644 --- a/test/arm/neon/cmla_lane.c +++ b/test/arm/neon/cmla_lane.c @@ -180,7 +180,6 @@ static int test_simde_vcmla_lane_f32(SIMDE_MUNIT_TEST_ARGS) { simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vcmla_lane_f32(r_, a, b, 0); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); - // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 0; @@ -404,7 +403,6 @@ static int test_simde_vcmla_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); - // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 0; @@ -676,7 +674,6 @@ static int test_simde_vcmlaq_lane_f32(SIMDE_MUNIT_TEST_ARGS) { simde_float32x4_t r = simde_vcmlaq_lane_f32(r_, a, b, 0); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); - // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 0; @@ -854,13 +851,10 @@ static int test_simde_vcmlaq_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); simde_float16x8_t r; - // = simde_vcmlaq_laneq_f16(r_, a, b, test_vec[i].lane); - // simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); SIMDE_CONSTIFY_4_( simde_vcmlaq_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); - // write_f16x8(r); simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); } @@ -976,7 +970,6 @@ static int test_simde_vcmlaq_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { simde_vcmlaq_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); - // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } diff --git a/test/arm/neon/cmla_rot180_lane.c b/test/arm/neon/cmla_rot180_lane.c index d727aeefd..fcf3b86eb 100644 --- a/test/arm/neon/cmla_rot180_lane.c +++ b/test/arm/neon/cmla_rot180_lane.c @@ -100,9 +100,6 @@ static int test_simde_vcmla_rot180_lane_f16(SIMDE_MUNIT_TEST_ARGS) { test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); - // simde_float16x4_t r = simde_vcmla_rot180_lane_f16(r_, a, b, - // test_vec[i].lane); simde_test_arm_neon_write_f16x4(2, r, - // SIMDE_TEST_VEC_POS_LAST); } return 0; @@ -181,10 +178,6 @@ static int test_simde_vcmla_rot180_lane_f32(SIMDE_MUNIT_TEST_ARGS) { simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vcmla_rot180_lane_f32(r_, a, b, 0); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); - // simde_float32x2_t r; - // SIMDE_CONSTIFY_2_(simde_vcmla_rot180_lane_f32, r, (HEDLEY_UNREACHABLE(), - // simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); - // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 0; @@ -310,9 +303,6 @@ static int test_simde_vcmla_rot180_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); - // simde_float16x4_t r = simde_vcmla_rot180_laneq_f16(r_, a, b, - // test_vec[i].lane); simde_test_arm_neon_write_f16x4(2, r, - // SIMDE_TEST_VEC_POS_LAST); simde_float16x4_t r; SIMDE_CONSTIFY_2_( @@ -411,9 +401,6 @@ static int test_simde_vcmla_rot180_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); - // simde_float32x2_t r = simde_vcmla_rot180_laneq_f32(r_, a, b, - // test_vec[i].lane); simde_test_arm_neon_write_f32x2(2, r, - // SIMDE_TEST_VEC_POS_LAST); } return 0; @@ -581,9 +568,6 @@ static int test_simde_vcmlaq_rot180_lane_f16(SIMDE_MUNIT_TEST_ARGS) { simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); - // simde_float16x8_t r = simde_vcmlaq_rot180_lane_f16(r_, a, b, - // test_vec[i].lane); simde_test_arm_neon_write_f16x8(2, r, - // SIMDE_TEST_VEC_POS_LAST); } return 0; @@ -687,7 +671,6 @@ static int test_simde_vcmlaq_rot180_lane_f32(SIMDE_MUNIT_TEST_ARGS) { simde_float32x4_t r = simde_vcmlaq_rot180_lane_f32(r_, a, b, 0); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); - // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 0; @@ -861,9 +844,6 @@ static int test_simde_vcmlaq_rot180_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); - // simde_float16x8_t r = simde_vcmlaq_rot180_laneq_f16(r_, a, b, - // test_vec[i].lane); simde_test_arm_neon_write_f16x8(2, r, - // SIMDE_TEST_VEC_POS_LAST); simde_float16x8_t r; SIMDE_CONSTIFY_4_( @@ -987,10 +967,6 @@ static int test_simde_vcmlaq_rot180_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); - - // simde_float32x4_t r = simde_vcmlaq_rot180_laneq_f32(r_, a, b, - // test_vec[i].lane); simde_test_arm_neon_write_f32x4(2, r, - // SIMDE_TEST_VEC_POS_LAST); } return 0; diff --git a/test/arm/neon/cmla_rot270_lane.c b/test/arm/neon/cmla_rot270_lane.c index 5cf034919..488c523f6 100644 --- a/test/arm/neon/cmla_rot270_lane.c +++ b/test/arm/neon/cmla_rot270_lane.c @@ -178,7 +178,6 @@ static int test_simde_vcmla_rot270_lane_f32(SIMDE_MUNIT_TEST_ARGS) { simde_float32x2_t r = simde_vcmla_rot270_lane_f32(r_, a, b, 0); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); - // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 0; @@ -400,7 +399,6 @@ static int test_simde_vcmla_rot270_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); - // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 0; @@ -670,7 +668,6 @@ static int test_simde_vcmlaq_rot270_lane_f32(SIMDE_MUNIT_TEST_ARGS) { simde_float32x4_t r = simde_vcmlaq_rot270_lane_f32(r_, a, b, 0); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); - // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 0; @@ -966,7 +963,6 @@ static int test_simde_vcmlaq_rot270_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); - // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 0; diff --git a/test/arm/neon/cmla_rot90_lane.c b/test/arm/neon/cmla_rot90_lane.c index f6dc1eda5..8daaef44b 100644 --- a/test/arm/neon/cmla_rot90_lane.c +++ b/test/arm/neon/cmla_rot90_lane.c @@ -98,11 +98,8 @@ static int test_simde_vcmla_rot90_lane_f16(SIMDE_MUNIT_TEST_ARGS) { simde_vcmla_rot90_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); - // simde_float16x4_t r = simde_vcmla_rot90_lane_f16(r_, a, b, - // test_vec[i].lane); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); - // simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 0; @@ -180,9 +177,6 @@ static int test_simde_vcmla_rot90_lane_f32(SIMDE_MUNIT_TEST_ARGS) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vcmla_rot90_lane_f32(r_, a, b, 0); - // SIMDE_CONSTIFY_2_(simde_vcmla_rot90_lane_f32, r, (HEDLEY_UNREACHABLE(), - // simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); - // simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } @@ -309,9 +303,6 @@ static int test_simde_vcmla_rot90_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); simde_float16x4_t r; - // simde_float16x4_t r = simde_vcmla_rot90_laneq_f16(r_, a, b, - // test_vec[i].lane); simde_test_arm_neon_write_f16x4(2, r, - // SIMDE_TEST_VEC_POS_LAST); SIMDE_CONSTIFY_2_( simde_vcmla_rot90_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), @@ -409,9 +400,6 @@ static int test_simde_vcmla_rot90_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); - // simde_float32x2_t r = simde_vcmla_rot90_laneq_f32(r_, a, b, - // test_vec[i].lane); simde_test_arm_neon_write_f32x2(2, r, - // SIMDE_TEST_VEC_POS_LAST); } return 0; @@ -579,9 +567,6 @@ static int test_simde_vcmlaq_rot90_lane_f16(SIMDE_MUNIT_TEST_ARGS) { simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); - // simde_float16x8_t r = simde_vcmlaq_rot90_lane_f16(r_, a, b, - // test_vec[i].lane); simde_test_arm_neon_write_f16x8(2, r, - // SIMDE_TEST_VEC_POS_LAST); } return 0; @@ -683,9 +668,6 @@ static int test_simde_vcmlaq_rot90_lane_f32(SIMDE_MUNIT_TEST_ARGS) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x4_t r = simde_vcmlaq_rot90_lane_f32(r_, a, b, 0); - // SIMDE_CONSTIFY_2_(simde_vcmlaq_rot90_lane_f32, r, (HEDLEY_UNREACHABLE(), - // simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); - // simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } @@ -861,9 +843,6 @@ static int test_simde_vcmlaq_rot90_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); simde_float16x8_t r; - // simde_float16x8_t r = simde_vcmlaq_rot90_laneq_f16(r_, a, b, - // test_vec[i].lane); simde_test_arm_neon_write_f16x8(2, r, - // SIMDE_TEST_VEC_POS_LAST); SIMDE_CONSTIFY_4_( simde_vcmlaq_rot90_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), @@ -987,9 +966,6 @@ static int test_simde_vcmlaq_rot90_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); - // simde_float32x4_t r = simde_vcmlaq_rot90_laneq_f32(r_, a, b, - // test_vec[i].lane); simde_test_arm_neon_write_f32x4(2, r, - // SIMDE_TEST_VEC_POS_LAST); } return 0; From e42ff320eeb834ea5566daf21c92d55f345464f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Tue, 17 Oct 2023 10:44:50 +0800 Subject: [PATCH 21/29] [Fix] : coding style --- simde/arm/neon/cadd_rot270.h | 28 +- simde/arm/neon/cadd_rot90.h | 32 +- simde/arm/neon/cmla_lane.h | 340 ++++++++++----------- simde/arm/neon/cmla_rot180_lane.h | 144 ++++----- simde/arm/neon/cmla_rot270_lane.h | 144 ++++----- simde/arm/neon/cmla_rot90_lane.h | 472 ++++++++++++++---------------- 6 files changed, 526 insertions(+), 634 deletions(-) diff --git a/simde/arm/neon/cadd_rot270.h b/simde/arm/neon/cadd_rot270.h index d22785c92..5b1fad11c 100644 --- a/simde/arm/neon/cadd_rot270.h +++ b/simde/arm/neon/cadd_rot270.h @@ -36,15 +36,15 @@ SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float16x4_t simde_vcadd_rot270_f16(simde_float16x4_t a, simde_float16x4_t b) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) return vcadd_rot270_f16(a, b); #else simde_float16x4_private r_, a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(b); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); r_.values = b_.values + a_.values; @@ -69,16 +69,16 @@ simde_float16x4_t simde_vcadd_rot270_f16(simde_float16x4_t a, SIMDE_FUNCTION_ATTRIBUTES simde_float16x8_t simde_vcaddq_rot270_f16(simde_float16x8_t a, simde_float16x8_t b) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) return vcaddq_rot270_f16(a, b); #else simde_float16x8_private r_, a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(b); -#if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ +#if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, 13, 4, 15, 6); @@ -104,8 +104,8 @@ simde_float16x8_t simde_vcaddq_rot270_f16(simde_float16x8_t a, SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vcadd_rot270_f32(simde_float32x2_t a, simde_float32x2_t b) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) return vcadd_rot270_f32(a, b); #else @@ -135,8 +135,8 @@ simde_float32x2_t simde_vcadd_rot270_f32(simde_float32x2_t a, SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vcaddq_rot270_f32(simde_float32x4_t a, simde_float32x4_t b) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) return vcaddq_rot270_f32(a, b); #else @@ -166,8 +166,8 @@ simde_float32x4_t simde_vcaddq_rot270_f32(simde_float32x4_t a, SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vcaddq_rot270_f64(simde_float64x2_t a, simde_float64x2_t b) { -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) return vcaddq_rot270_f64(a, b); #else diff --git a/simde/arm/neon/cadd_rot90.h b/simde/arm/neon/cadd_rot90.h index 2f172a860..5d6607e11 100644 --- a/simde/arm/neon/cadd_rot90.h +++ b/simde/arm/neon/cadd_rot90.h @@ -36,15 +36,15 @@ SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float16x4_t simde_vcadd_rot90_f16(simde_float16x4_t a, simde_float16x4_t b) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) return vcadd_rot90_f16(a, b); #else simde_float16x4_private r_, a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(b); -#if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ +#if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); r_.values = b_.values + a_.values; @@ -64,22 +64,22 @@ simde_float16x4_t simde_vcadd_rot90_f16(simde_float16x4_t a, #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcadd_rot90_f16 -#define vcadd_rot90_f16(a, b) simde_vcadd_rot90_f16(a, b) + #undef vcadd_rot90_f16 + #define vcadd_rot90_f16(a, b) simde_vcadd_rot90_f16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float16x8_t simde_vcaddq_rot90_f16(simde_float16x8_t a, simde_float16x8_t b) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) return vcaddq_rot90_f16(a, b); #else simde_float16x8_private r_, a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(b); -#if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ +#if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, 5, 12, 7, 14); @@ -107,8 +107,8 @@ simde_float16x8_t simde_vcaddq_rot90_f16(simde_float16x8_t a, SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vcadd_rot90_f32(simde_float32x2_t a, simde_float32x2_t b) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) return vcadd_rot90_f32(a, b); #else @@ -138,8 +138,8 @@ simde_float32x2_t simde_vcadd_rot90_f32(simde_float32x2_t a, SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vcaddq_rot90_f32(simde_float32x4_t a, simde_float32x4_t b) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) return vcaddq_rot90_f32(a, b); #else @@ -169,8 +169,8 @@ simde_float32x4_t simde_vcaddq_rot90_f32(simde_float32x4_t a, SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vcaddq_rot90_f64(simde_float64x2_t a, simde_float64x2_t b) { -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) return vcaddq_rot90_f64(a, b); #else diff --git a/simde/arm/neon/cmla_lane.h b/simde/arm/neon/cmla_lane.h index c6c81b77d..7537aacf1 100644 --- a/simde/arm/neon/cmla_lane.h +++ b/simde/arm/neon/cmla_lane.h @@ -39,53 +39,49 @@ SIMDE_FUNCTION_ATTRIBUTES simde_float16x4_t simde_vcmla_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x4_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_2_(vcmla_lane_f16, result, (HEDLEY_UNREACHABLE(), result), - lane, r, a, b); - -#else simde_float16x4_private r_ = simde_float16x4_to_private(r), a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x4_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - simde_float16_to_float32(b_.values[lane]) * - simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - simde_float16_to_float32(b_.values[lane]) * - simde_float16_to_float32(a_.values[2 * i])); - } -#endif - result = simde_float16x4_from_private(r_); - return result; -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + simde_float16_to_float32(b_.values[lane]) * + simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + simde_float16_to_float32(b_.values[lane]) * + simde_float16_to_float32(a_.values[2 * i])); + } + #endif + return simde_float16x4_from_private(r_); + } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_lane_f16 #define vcmla_lane_f16(r, a, b, lane) simde_vcmla_lane_f16(r, a, b, lane) #endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmla_lane_f16(r, a, b, lane) vcmla_lane_f16(r, a, b, lane) +#endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vcmla_lane_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) return vcmla_lane_f32(r, a, b, 0); #else @@ -119,44 +115,40 @@ simde_float16x4_t simde_vcmla_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x4_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_2_(vcmla_laneq_f16, result, (HEDLEY_UNREACHABLE(), result), - lane, r, a, b); -#else simde_float16x4_private r_ = simde_float16x4_to_private(r), a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - simde_float16_to_float32(b_.values[lane]) * - simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - simde_float16_to_float32(b_.values[lane]) * - simde_float16_to_float32(a_.values[2 * i])); - } -#endif - result = simde_float16x4_from_private(r_); - return result; -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + simde_float16_to_float32(b_.values[lane]) * + simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + simde_float16_to_float32(b_.values[lane]) * + simde_float16_to_float32(a_.values[2 * i])); + } + #endif + return simde_float16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmla_laneq_f16 -#define vcmla_laneq_f16(r, a, b, lane) simde_vcmla_laneq_f16(r, a, b, lane) + #undef vcmla_laneq_f16 + #define vcmla_laneq_f16(r, a, b, lane) simde_vcmla_laneq_f16(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmla_laneq_f16(r, a, b, lane) vcmla_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -164,36 +156,32 @@ simde_float32x2_t simde_vcmla_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x2_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_2_(vcmla_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), - lane, r, a, b); -#else simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(simde_vdup_n_f32( simde_float32x4_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; - } -#endif - result = simde_float32x2_from_private(r_); - return result; -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + } + #endif + return simde_float32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmla_laneq_f32 -#define vcmla_laneq_f32(r, a, b, lane) simde_vcmla_laneq_f32(r, a, b, lane) + #undef vcmla_laneq_f32 + #define vcmla_laneq_f32(r, a, b, lane) simde_vcmla_laneq_f32(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmla_laneq_f32(r, a, b, lane) vcmla_laneq_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -201,45 +189,42 @@ simde_float16x8_t simde_vcmlaq_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x8_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_2_(vcmlaq_lane_f16, result, (HEDLEY_UNREACHABLE(), result), - lane, r, a, b); -#else simde_float16x8_private r_ = simde_float16x8_to_private(r), a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x4_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, - 4, 6, 6); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - simde_float16_to_float32(b_.values[lane]) * - simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - simde_float16_to_float32(b_.values[lane]) * - simde_float16_to_float32(a_.values[2 * i])); - } -#endif - result = simde_float16x8_from_private(r_); - return result; -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, + 4, 6, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + simde_float16_to_float32(b_.values[lane]) * + simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + simde_float16_to_float32(b_.values[lane]) * + simde_float16_to_float32(a_.values[2 * i])); + } + #endif + return simde_float16x8_from_private(r_); + } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmlaq_lane_f16 -#define vcmlaq_lane_f16(r, a, b, lane) simde_vcmlaq_lane_f16(r, a, b, lane) + #undef vcmlaq_lane_f16 + #define vcmlaq_lane_f16(r, a, b, lane) simde_vcmlaq_lane_f16(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmlaq_lane_f16(r, a, b, lane) vcmlaq_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -247,34 +232,31 @@ simde_float32x4_t simde_vcmlaq_lane_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcmlaq_lane_f32(r, a, b, 0); -#else simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(simde_vdupq_n_f32( simde_float32x2_to_private(b).values[lane])); - -#if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; - } -#endif - + #if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + } + #endif return simde_float32x4_from_private(r_); -#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmlaq_lane_f32 -#define vcmlaq_lane_f32(r, a, b, lane) simde_vcmlaq_lane_f32(r, a, b, lane) + #undef vcmlaq_lane_f32 + #define vcmlaq_lane_f32(r, a, b, lane) simde_vcmlaq_lane_f32(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmlaq_lane_f32(r, a, b, lane) vcmlaq_lane_f32(r, a, b, 0); #endif SIMDE_FUNCTION_ATTRIBUTES @@ -282,45 +264,41 @@ simde_float16x8_t simde_vcmlaq_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float16x8_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_4_(vcmlaq_laneq_f16, result, (HEDLEY_UNREACHABLE(), result), - lane, r, a, b); -#else simde_float16x8_private r_ = simde_float16x8_to_private(r), a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x8_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, - 4, 6, 6); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - simde_float16_to_float32(b_.values[lane]) * - simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - simde_float16_to_float32(b_.values[lane]) * - simde_float16_to_float32(a_.values[2 * i])); - } -#endif - result = simde_float16x8_from_private(r_); - return result; -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, + 4, 6, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + simde_float16_to_float32(b_.values[lane]) * + simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + simde_float16_to_float32(b_.values[lane]) * + simde_float16_to_float32(a_.values[2 * i])); + } + #endif + return simde_float16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmlaq_laneq_f16 -#define vcmlaq_laneq_f16(r, a, b, lane) simde_vcmlaq_laneq_f16(r, a, b, lane) + #undef vcmlaq_laneq_f16 + #define vcmlaq_laneq_f16(r, a, b, lane) simde_vcmlaq_laneq_f16(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmlaq_laneq_f16(r, a, b, lane) vcmlaq_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -328,13 +306,6 @@ simde_float32x4_t simde_vcmlaq_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_2_(vcmlaq_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), - lane, r, a, b); -#else simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(simde_vdupq_n_f32( @@ -351,13 +322,16 @@ simde_float32x4_t simde_vcmlaq_laneq_f32(simde_float32x4_t r, r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; } #endif - result = simde_float32x4_from_private(r_); - return result; -#endif + return simde_float32x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmlaq_laneq_f32 -#define vcmlaq_laneq_f32(r, a, b, lane) simde_vcmlaq_laneq_f32(r, a, b, lane) + #undef vcmlaq_laneq_f32 + #define vcmlaq_laneq_f32(r, a, b, lane) simde_vcmlaq_laneq_f32(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmlaq_laneq_f32(r, a, b, lane) vcmlaq_laneq_f32(r, a, b, lane) #endif SIMDE_END_DECLS_ diff --git a/simde/arm/neon/cmla_rot180_lane.h b/simde/arm/neon/cmla_rot180_lane.h index a8af95764..ae3d0e586 100644 --- a/simde/arm/neon/cmla_rot180_lane.h +++ b/simde/arm/neon/cmla_rot180_lane.h @@ -36,14 +36,13 @@ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vcmla_rot180_lane_f16(simde_float16x4_t r, - simde_float16x4_t a, - simde_float16x4_t b, - const int lane) +simde_float16x4_t +simde_vcmla_rot180_lane_f16(simde_float16x4_t r, simde_float16x4_t a, + simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float16x4_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) SIMDE_CONSTIFY_2_(vcmla_rot180_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); @@ -54,8 +53,8 @@ simde_float16x4_t simde_vcmla_rot180_lane_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x4_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ +#if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); @@ -74,25 +73,22 @@ simde_float16x4_t simde_vcmla_rot180_lane_f16(simde_float16x4_t r, simde_float16_to_float32(a_.values[2 * i])); } #endif - - result = simde_float16x4_from_private(r_); - return result; + return simde_float16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot180_lane_f16 -#define vcmla_rot180_lane_f16(r, a, b, lane) \ +#define vcmla_rot180_lane_f16(r, a, b, lane) \ simde_vcmla_rot180_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vcmla_rot180_lane_f32(simde_float32x2_t r, - simde_float32x2_t a, - simde_float32x2_t b, - const int lane) +simde_float32x2_t +simde_vcmla_rot180_lane_f32(simde_float32x2_t r, simde_float32x2_t a, + simde_float32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) return vcmla_rot180_lane_f32(r, a, b, 0); #else @@ -119,19 +115,18 @@ simde_float32x2_t simde_vcmla_rot180_lane_f32(simde_float32x2_t r, } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot180_lane_f32 -#define vcmla_rot180_lane_f32(r, a, b, lane) \ +#define vcmla_rot180_lane_f32(r, a, b, lane) \ simde_vcmla_rot180_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, - simde_float16x8_t a, - simde_float16x4_t b, - const int lane) +simde_float16x8_t +simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, simde_float16x8_t a, + simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float16x8_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) SIMDE_CONSTIFY_2_(vcmlaq_rot180_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); @@ -141,8 +136,8 @@ simde_float16x8_t simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x4_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); @@ -163,25 +158,22 @@ simde_float16x8_t simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, simde_float16_to_float32(a_.values[2 * i])); } #endif - - result = simde_float16x8_from_private(r_); - return result; + return simde_float16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot180_lane_f16 -#define vcmlaq_rot180_lane_f16(r, a, b, lane) \ +#define vcmlaq_rot180_lane_f16(r, a, b, lane) \ simde_vcmlaq_rot180_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vcmlaq_rot180_lane_f32(simde_float32x4_t r, - simde_float32x4_t a, - simde_float32x2_t b, - const int lane) +simde_float32x4_t +simde_vcmlaq_rot180_lane_f32(simde_float32x4_t r, simde_float32x4_t a, + simde_float32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) return vcmlaq_rot180_lane_f32(r, a, b, 0); #else @@ -208,19 +200,18 @@ simde_float32x4_t simde_vcmlaq_rot180_lane_f32(simde_float32x4_t r, } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot180_lane_f32 -#define vcmlaq_rot180_lane_f32(r, a, b, lane) \ +#define vcmlaq_rot180_lane_f32(r, a, b, lane) \ simde_vcmlaq_rot180_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vcmla_rot180_laneq_f16(simde_float16x4_t r, - simde_float16x4_t a, - simde_float16x8_t b, - const int lane) +simde_float16x4_t +simde_vcmla_rot180_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, + simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float16x4_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) SIMDE_CONSTIFY_2_(vcmla_rot180_laneq_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); @@ -230,8 +221,8 @@ simde_float16x4_t simde_vcmla_rot180_laneq_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); @@ -250,26 +241,23 @@ simde_float16x4_t simde_vcmla_rot180_laneq_f16(simde_float16x4_t r, simde_float16_to_float32(a_.values[2 * i])); } #endif - - result = simde_float16x4_from_private(r_); - return result; + return simde_float16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot180_laneq_f16 -#define vcmla_rot180_laneq_f16(r, a, b, lane) \ +#define vcmla_rot180_laneq_f16(r, a, b, lane) \ simde_vcmla_rot180_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vcmla_rot180_laneq_f32(simde_float32x2_t r, - simde_float32x2_t a, - simde_float32x4_t b, - const int lane) +simde_float32x2_t +simde_vcmla_rot180_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, + simde_float32x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float32x2_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) SIMDE_CONSTIFY_2_(vcmla_rot180_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); @@ -291,26 +279,23 @@ simde_float32x2_t simde_vcmla_rot180_laneq_f32(simde_float32x2_t r, r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; } #endif - - result = simde_float32x2_from_private(r_); - return result; + return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot180_laneq_f32 -#define vcmla_rot180_laneq_f32(r, a, b, lane) \ +#define vcmla_rot180_laneq_f32(r, a, b, lane) \ simde_vcmla_rot180_laneq_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, - simde_float16x8_t a, - simde_float16x8_t b, - const int lane) +simde_float16x8_t +simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, + simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_float16x8_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) SIMDE_CONSTIFY_4_(vcmlaq_rot180_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); @@ -320,8 +305,8 @@ simde_float16x8_t simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x8_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); @@ -342,26 +327,23 @@ simde_float16x8_t simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, simde_float16_to_float32(a_.values[2 * i])); } #endif - - result = simde_float16x8_from_private(r_); - return result; + return simde_float16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot180_laneq_f16 -#define vcmlaq_rot180_laneq_f16(r, a, b, lane) \ +#define vcmlaq_rot180_laneq_f16(r, a, b, lane) \ simde_vcmlaq_rot180_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vcmlaq_rot180_laneq_f32(simde_float32x4_t r, - simde_float32x4_t a, - simde_float32x4_t b, - const int lane) +simde_float32x4_t +simde_vcmlaq_rot180_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, + simde_float32x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float32x4_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) SIMDE_CONSTIFY_2_(vcmlaq_rot180_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); @@ -383,14 +365,12 @@ simde_float32x4_t simde_vcmlaq_rot180_laneq_f32(simde_float32x4_t r, r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; } #endif - - result = simde_float32x4_from_private(r_); - return result; + return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot180_laneq_f32 -#define vcmlaq_rot180_laneq_f32(r, a, b, lane) \ +#define vcmlaq_rot180_laneq_f32(r, a, b, lane) \ simde_vcmlaq_rot180_laneq_f32(r, a, b, lane) #endif diff --git a/simde/arm/neon/cmla_rot270_lane.h b/simde/arm/neon/cmla_rot270_lane.h index 3d3cd730c..0967bcc9f 100644 --- a/simde/arm/neon/cmla_rot270_lane.h +++ b/simde/arm/neon/cmla_rot270_lane.h @@ -36,14 +36,13 @@ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vcmla_rot270_lane_f16(simde_float16x4_t r, - simde_float16x4_t a, - simde_float16x4_t b, - const int lane) +simde_float16x4_t +simde_vcmla_rot270_lane_f16(simde_float16x4_t r, simde_float16x4_t a, + simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float16x4_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) SIMDE_CONSTIFY_2_(vcmla_rot270_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); @@ -53,8 +52,8 @@ simde_float16x4_t simde_vcmla_rot270_lane_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x4_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); @@ -73,25 +72,22 @@ simde_float16x4_t simde_vcmla_rot270_lane_f16(simde_float16x4_t r, simde_float16_to_float32(a_.values[2 * i + 1])); } #endif - - result = simde_float16x4_from_private(r_); - return result; + return simde_float16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot270_lane_f16 -#define vcmla_rot270_lane_f16(r, a, b, lane) \ +#define vcmla_rot270_lane_f16(r, a, b, lane) \ simde_vcmla_rot270_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vcmla_rot270_lane_f32(simde_float32x2_t r, - simde_float32x2_t a, - simde_float32x2_t b, - const int lane) +simde_float32x2_t +simde_vcmla_rot270_lane_f32(simde_float32x2_t r, simde_float32x2_t a, + simde_float32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) return vcmla_rot270_lane_f32(r, a, b, 0); #else @@ -118,19 +114,18 @@ simde_float32x2_t simde_vcmla_rot270_lane_f32(simde_float32x2_t r, } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot270_lane_f32 -#define vcmla_rot270_lane_f32(r, a, b, lane) \ +#define vcmla_rot270_lane_f32(r, a, b, lane) \ simde_vcmla_rot270_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, - simde_float16x8_t a, - simde_float16x4_t b, - const int lane) +simde_float16x8_t +simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, simde_float16x8_t a, + simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float16x8_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) SIMDE_CONSTIFY_2_(vcmlaq_rot270_lane_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); @@ -140,8 +135,8 @@ simde_float16x8_t simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x4_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); @@ -162,25 +157,22 @@ simde_float16x8_t simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, simde_float16_to_float32(a_.values[2 * i + 1])); } #endif - - result = simde_float16x8_from_private(r_); - return result; + return simde_float16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot270_lane_f16 -#define vcmlaq_rot270_lane_f16(r, a, b, lane) \ +#define vcmlaq_rot270_lane_f16(r, a, b, lane) \ simde_vcmlaq_rot270_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vcmlaq_rot270_lane_f32(simde_float32x4_t r, - simde_float32x4_t a, - simde_float32x2_t b, - const int lane) +simde_float32x4_t +simde_vcmlaq_rot270_lane_f32(simde_float32x4_t r, simde_float32x4_t a, + simde_float32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) return vcmlaq_rot270_lane_f32(r, a, b, 0); #else @@ -207,19 +199,18 @@ simde_float32x4_t simde_vcmlaq_rot270_lane_f32(simde_float32x4_t r, } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot270_lane_f32 -#define vcmlaq_rot270_lane_f32(r, a, b, lane) \ +#define vcmlaq_rot270_lane_f32(r, a, b, lane) \ simde_vcmlaq_rot270_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vcmla_rot270_laneq_f16(simde_float16x4_t r, - simde_float16x4_t a, - simde_float16x8_t b, - const int lane) +simde_float16x4_t +simde_vcmla_rot270_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, + simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float16x4_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) SIMDE_CONSTIFY_2_(vcmla_rot270_laneq_f16, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); @@ -229,8 +220,8 @@ simde_float16x4_t simde_vcmla_rot270_laneq_f16(simde_float16x4_t r, b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); @@ -249,26 +240,23 @@ simde_float16x4_t simde_vcmla_rot270_laneq_f16(simde_float16x4_t r, simde_float16_to_float32(a_.values[2 * i + 1])); } #endif - - result = simde_float16x4_from_private(r_); - return result; + return simde_float16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot270_laneq_f16 -#define vcmla_rot270_laneq_f16(r, a, b, lane) \ +#define vcmla_rot270_laneq_f16(r, a, b, lane) \ simde_vcmla_rot270_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vcmla_rot270_laneq_f32(simde_float32x2_t r, - simde_float32x2_t a, - simde_float32x4_t b, - const int lane) +simde_float32x2_t +simde_vcmla_rot270_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, + simde_float32x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float32x2_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) SIMDE_CONSTIFY_2_(vcmla_rot270_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); @@ -290,26 +278,23 @@ simde_float32x2_t simde_vcmla_rot270_laneq_f32(simde_float32x2_t r, r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; } #endif - - result = simde_float32x2_from_private(r_); - return result; + return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot270_laneq_f32 -#define vcmla_rot270_laneq_f32(r, a, b, lane) \ +#define vcmla_rot270_laneq_f32(r, a, b, lane) \ simde_vcmla_rot270_laneq_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, - simde_float16x8_t a, - simde_float16x8_t b, - const int lane) +simde_float16x8_t +simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, + simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_float16x8_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) SIMDE_CONSTIFY_4_(vcmlaq_rot270_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); @@ -319,8 +304,8 @@ simde_float16x8_t simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x8_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ +#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); @@ -341,26 +326,23 @@ simde_float16x8_t simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, simde_float16_to_float32(a_.values[2 * i + 1])); } #endif - - result = simde_float16x8_from_private(r_); - return result; + return simde_float16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot270_laneq_f16 -#define vcmlaq_rot270_laneq_f16(r, a, b, lane) \ +#define vcmlaq_rot270_laneq_f16(r, a, b, lane) \ simde_vcmlaq_rot270_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vcmlaq_rot270_laneq_f32(simde_float32x4_t r, - simde_float32x4_t a, - simde_float32x4_t b, - const int lane) +simde_float32x4_t +simde_vcmlaq_rot270_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, + simde_float32x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float32x4_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) SIMDE_CONSTIFY_2_(vcmlaq_rot270_laneq_f32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); @@ -382,14 +364,12 @@ simde_float32x4_t simde_vcmlaq_rot270_laneq_f32(simde_float32x4_t r, r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; } #endif - - result = simde_float32x4_from_private(r_); - return result; + return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot270_laneq_f32 -#define vcmlaq_rot270_laneq_f32(r, a, b, lane) \ +#define vcmlaq_rot270_laneq_f32(r, a, b, lane) \ simde_vcmlaq_rot270_laneq_f32(r, a, b, lane) #endif diff --git a/simde/arm/neon/cmla_rot90_lane.h b/simde/arm/neon/cmla_rot90_lane.h index e146661c6..c625b132e 100644 --- a/simde/arm/neon/cmla_rot90_lane.h +++ b/simde/arm/neon/cmla_rot90_lane.h @@ -37,18 +37,10 @@ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vcmla_rot90_lane_f16(simde_float16x4_t r, - simde_float16x4_t a, - simde_float16x4_t b, - const int lane) +simde_float16x4_t +simde_vcmla_rot90_lane_f16(simde_float16x4_t r, simde_float16x4_t a, + simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x4_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_2_(vcmla_rot90_lane_f16, result, - (HEDLEY_UNREACHABLE(), result), lane, r, a, b); -#else simde_float32x4_private r_ = simde_float32x4_to_private(simde_vcvt_f32_f16(r)), a_ = @@ -56,332 +48,298 @@ simde_float16x4_t simde_vcmla_rot90_lane_f16(simde_float16x4_t r, b_ = simde_float32x4_to_private( simde_vcvt_f32_f16(simde_vdup_n_f16( simde_float16x4_to_private(b).values[lane]))); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } -#endif - result = simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); - return result; -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } + #endif + return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmla_rot90_lane_f16 -#define vcmla_rot90_lane_f16(r, a, b, lane) \ - simde_vcmla_rot90_lane_f16(r, a, b, lane) + #undef vcmla_rot90_lane_f16 + #define vcmla_rot90_lane_f16(r, a, b, lane) simde_vcmla_rot90_lane_f16(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmla_rot90_lane_f16(r, a, b, lane) vcmla_rot90_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vcmla_rot90_lane_f32(simde_float32x2_t r, - simde_float32x2_t a, - simde_float32x2_t b, - const int lane) +simde_float32x2_t +simde_vcmla_rot90_lane_f32(simde_float32x2_t r, simde_float32x2_t a, + simde_float32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcmla_rot90_lane_f32(r, a, b, 0); -#else simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(simde_vdup_n_f32( simde_float32x2_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } + #endif return simde_float32x2_from_private(r_); -#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmla_rot90_lane_f32 -#define vcmla_rot90_lane_f32(r, a, b, lane) \ - simde_vcmla_rot90_lane_f32(r, a, b, lane) + #undef vcmla_rot90_lane_f32 + #define vcmla_rot90_lane_f32(r, a, b, lane) simde_vcmla_rot90_lane_f32(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmla_rot90_lane_f32(r, a, b, lane) vcmla_rot90_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, - simde_float16x8_t a, - simde_float16x4_t b, - const int lane) +simde_float16x8_t +simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, simde_float16x8_t a, + simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x8_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_2_(vcmlaq_rot90_lane_f16, result, - (HEDLEY_UNREACHABLE(), result), lane, r, a, b); -#else simde_float16x8_private r_ = simde_float16x8_to_private(r), a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x4_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, - 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, - 5, 12, 7, 14); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - (-simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i + 1]))); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (simde_float16_to_float32(b_.values[2 * i]) * - simde_float16_to_float32(a_.values[2 * i + 1]))); - } -#endif - result = simde_float16x8_from_private(r_); - return result; -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, + 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, + 5, 12, 7, 14); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + (-simde_float16_to_float32(b_.values[2 * i + 1]) * + simde_float16_to_float32(a_.values[2 * i + 1]))); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (simde_float16_to_float32(b_.values[2 * i]) * + simde_float16_to_float32(a_.values[2 * i + 1]))); + } + #endif + return simde_float16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmlaq_rot90_lane_f16 -#define vcmlaq_rot90_lane_f16(r, a, b, lane) \ - simde_vcmlaq_rot90_lane_f16(r, a, b, lane) + #undef vcmlaq_rot90_lane_f16 + #define vcmlaq_rot90_lane_f16(r, a, b, lane) simde_vcmlaq_rot90_lane_f16(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmlaq_rot90_lane_f16(r, a, b, lane) vcmlaq_rot90_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vcmlaq_rot90_lane_f32(simde_float32x4_t r, - simde_float32x4_t a, - simde_float32x2_t b, - const int lane) +simde_float32x4_t +simde_vcmlaq_rot90_lane_f32(simde_float32x4_t r, simde_float32x4_t a, + simde_float32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcmlaq_rot90_lane_f32(r, a, b, 0); -#else simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(simde_vdupq_n_f32( simde_float32x2_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } -#endif - + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } + #endif return simde_float32x4_from_private(r_); -#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmlaq_rot90_lane_f32 -#define vcmlaq_rot90_lane_f32(r, a, b, lane) \ - simde_vcmlaq_rot90_lane_f32(r, a, b, lane) + #undef vcmlaq_rot90_lane_f32 + #define vcmlaq_rot90_lane_f32(r, a, b, lane) simde_vcmlaq_rot90_lane_f32(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmlaq_rot90_lane_f32(r, a, b, lane) vcmlaq_rot90_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vcmla_rot90_laneq_f16(simde_float16x4_t r, - simde_float16x4_t a, - simde_float16x8_t b, - const int lane) +simde_float16x4_t +simde_vcmla_rot90_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, + simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x4_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_2_(vcmla_rot90_laneq_f16, result, - (HEDLEY_UNREACHABLE(), result), lane, r, a, b); -#else + simde_float16x4_private r_ = simde_float16x4_to_private(r), a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - (-simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i + 1]))); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (simde_float16_to_float32(b_.values[2 * i]) * - simde_float16_to_float32(a_.values[2 * i + 1]))); - } -#endif - result = simde_float16x4_from_private(r_); - return result; -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + (-simde_float16_to_float32(b_.values[2 * i + 1]) * + simde_float16_to_float32(a_.values[2 * i + 1]))); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (simde_float16_to_float32(b_.values[2 * i]) * + simde_float16_to_float32(a_.values[2 * i + 1]))); + } + #endif + return simde_float16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmla_rot90_laneq_f16 -#define vcmla_rot90_laneq_f16(r, a, b, lane) \ - simde_vcmla_rot90_laneq_f16(r, a, b, lane) + #undef vcmla_rot90_laneq_f16 + #define vcmla_rot90_laneq_f16(r, a, b, lane) simde_vcmla_rot90_laneq_f16(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmla_rot90_laneq_f16(r, a, b, lane) vcmla_rot90_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vcmla_rot90_laneq_f32(simde_float32x2_t r, - simde_float32x2_t a, - simde_float32x4_t b, - const int lane) +simde_float32x2_t +simde_vcmla_rot90_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, + simde_float32x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x2_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_2_(vcmla_rot90_laneq_f32, result, - (HEDLEY_UNREACHABLE(), result), lane, r, a, b); -#else simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(simde_vdup_n_f32( simde_float32x4_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } -#endif - - result = simde_float32x2_from_private(r_); - return result; -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } + #endif + return simde_float32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmla_rot90_laneq_f32 -#define vcmla_rot90_laneq_f32(r, a, b, lane) \ - simde_vcmla_rot90_laneq_f32(r, a, b, lane) + #undef vcmla_rot90_laneq_f32 + #define vcmla_rot90_laneq_f32(r, a, b, lane) simde_vcmla_rot90_laneq_f32(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmla_rot90_laneq_f32(r, a, b, lane) vcmla_rot90_laneq_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, - simde_float16x8_t a, - simde_float16x8_t b, - const int lane) +simde_float16x8_t +simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, + simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float16x8_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_4_(vcmlaq_rot90_laneq_f16, result, - (HEDLEY_UNREACHABLE(), result), lane, r, a, b); -#else + simde_float16x8_private r_ = simde_float16x8_to_private(r), a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x8_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, - 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, - 5, 12, 7, 14); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - (-simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i + 1]))); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (simde_float16_to_float32(b_.values[2 * i]) * - simde_float16_to_float32(a_.values[2 * i + 1]))); - } -#endif - result = simde_float16x8_from_private(r_); - return result; -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, + 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, + 5, 12, 7, 14); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + (-simde_float16_to_float32(b_.values[2 * i + 1]) * + simde_float16_to_float32(a_.values[2 * i + 1]))); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (simde_float16_to_float32(b_.values[2 * i]) * + simde_float16_to_float32(a_.values[2 * i + 1]))); + } + #endif + return simde_float16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmlaq_rot90_laneq_f16 -#define vcmlaq_rot90_laneq_f16(r, a, b, lane) \ - simde_vcmlaq_rot90_laneq_f16(r, a, b, lane) + #undef vcmlaq_rot90_laneq_f16 + #define vcmlaq_rot90_laneq_f16(r, a, b, lane) simde_vcmlaq_rot90_laneq_f16(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmlaq_rot90_laneq_f16(r, a, b, lane) vcmlaq_rot90_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vcmlaq_rot90_laneq_f32(simde_float32x4_t r, - simde_float32x4_t a, - simde_float32x4_t b, - const int lane) +simde_float32x4_t +simde_vcmlaq_rot90_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, + simde_float32x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_2_(vcmlaq_rot90_laneq_f32, result, - (HEDLEY_UNREACHABLE(), result), lane, r, a, b); -#else simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(simde_vdupq_n_f32( simde_float32x4_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; - } -#endif - result = simde_float32x4_from_private(r_); - return result; -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } + #endif + return simde_float32x4_from_private(r_); + } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmlaq_rot90_laneq_f32 -#define vcmlaq_rot90_laneq_f32(r, a, b, lane) \ - simde_vcmlaq_rot90_laneq_f32(r, a, b, lane) + #undef vcmlaq_rot90_laneq_f32 + #define vcmlaq_rot90_laneq_f32(r, a, b, lane) simde_vcmlaq_rot90_laneq_f32(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmlaq_rot90_laneq_f32(r, a, b, lane) vcmlaq_rot90_laneq_f32(r, a, b, lane) #endif - SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP From 609c46ffe50724f4b867d3793e2becb37941f82e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Tue, 17 Oct 2023 12:09:16 +0800 Subject: [PATCH 22/29] [Fix] : warning of unused variable --- simde/arm/neon/cmla_rot180_lane.h | 380 ++++++++++++++--------------- simde/arm/neon/cmla_rot270_lane.h | 387 ++++++++++++++---------------- 2 files changed, 349 insertions(+), 418 deletions(-) diff --git a/simde/arm/neon/cmla_rot180_lane.h b/simde/arm/neon/cmla_rot180_lane.h index ae3d0e586..7782c1fa5 100644 --- a/simde/arm/neon/cmla_rot180_lane.h +++ b/simde/arm/neon/cmla_rot180_lane.h @@ -40,46 +40,40 @@ simde_float16x4_t simde_vcmla_rot180_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x4_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_2_(vcmla_rot180_lane_f16, result, - (HEDLEY_UNREACHABLE(), result), lane, r, a, b); - -#else simde_float16x4_private r_ = simde_float16x4_to_private(r), a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x4_to_private(b).values[lane])); - -#if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - (-simde_float16_to_float32(b_.values[2 * i])) * - simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (-simde_float16_to_float32(b_.values[2 * i + 1])) * - simde_float16_to_float32(a_.values[2 * i])); - } -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + (-simde_float16_to_float32(b_.values[2 * i])) * + simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (-simde_float16_to_float32(b_.values[2 * i + 1])) * + simde_float16_to_float32(a_.values[2 * i])); + } + #endif return simde_float16x4_from_private(r_); -#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmla_rot180_lane_f16 -#define vcmla_rot180_lane_f16(r, a, b, lane) \ - simde_vcmla_rot180_lane_f16(r, a, b, lane) + #undef vcmla_rot180_lane_f16 + #define vcmla_rot180_lane_f16(r, a, b, lane) simde_vcmla_rot180_lane_f16(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmla_rot180_lane_f16(r, a, b, lane) vcmla_rot180_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -87,11 +81,6 @@ simde_float32x2_t simde_vcmla_rot180_lane_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcmla_rot180_lane_f32(r, a, b, 0); -#else simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(simde_vdup_n_f32( @@ -111,60 +100,59 @@ simde_vcmla_rot180_lane_f32(simde_float32x2_t r, simde_float32x2_t a, #endif return simde_float32x2_from_private(r_); -#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot180_lane_f32 #define vcmla_rot180_lane_f32(r, a, b, lane) \ simde_vcmla_rot180_lane_f32(r, a, b, lane) #endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmla_rot180_lane_f32(r, a, b, lane) vcmla_rot180_lane_f32(r, a, b, lane) +#endif SIMDE_FUNCTION_ATTRIBUTES simde_float16x8_t simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x8_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_2_(vcmlaq_rot180_lane_f16, result, - (HEDLEY_UNREACHABLE(), result), lane, r, a, b); -#else simde_float16x8_private r_ = simde_float16x8_to_private(r), a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x4_to_private(b).values[lane])); - -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, - 4, 6, 6); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, - 5, 6, 7); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - (-simde_float16_to_float32(b_.values[2 * i])) * - simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (-simde_float16_to_float32(b_.values[2 * i + 1])) * - simde_float16_to_float32(a_.values[2 * i])); - } -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, + 4, 6, 6); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, + 5, 6, 7); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + (-simde_float16_to_float32(b_.values[2 * i])) * + simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (-simde_float16_to_float32(b_.values[2 * i + 1])) * + simde_float16_to_float32(a_.values[2 * i])); + } + #endif return simde_float16x8_from_private(r_); -#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmlaq_rot180_lane_f16 -#define vcmlaq_rot180_lane_f16(r, a, b, lane) \ - simde_vcmlaq_rot180_lane_f16(r, a, b, lane) + #undef vcmlaq_rot180_lane_f16 + #define vcmlaq_rot180_lane_f16(r, a, b, lane) simde_vcmlaq_rot180_lane_f16(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmlaq_rot180_lane_f16(r, a, b, lane) vcmlaq_rot180_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -172,36 +160,34 @@ simde_float32x4_t simde_vcmlaq_rot180_lane_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcmlaq_rot180_lane_f32(r, a, b, 0); -#else simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(simde_vdupq_n_f32( simde_float32x2_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 0, 1, 2, 3); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 0, 1, 2, 3); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } + #endif return simde_float32x4_from_private(r_); -#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmlaq_rot180_lane_f32 -#define vcmlaq_rot180_lane_f32(r, a, b, lane) \ - simde_vcmlaq_rot180_lane_f32(r, a, b, lane) + #undef vcmlaq_rot180_lane_f32 + #define vcmlaq_rot180_lane_f32(r, a, b, lane) simde_vcmlaq_rot180_lane_f32(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmlaq_rot180_lane_f32(r, a, b, lane) vcmlaq_rot180_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -209,45 +195,41 @@ simde_float16x4_t simde_vcmla_rot180_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x4_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_2_(vcmla_rot180_laneq_f16, result, - (HEDLEY_UNREACHABLE(), result), lane, r, a, b); -#else simde_float16x4_private r_ = simde_float16x4_to_private(r), a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - (-simde_float16_to_float32(b_.values[2 * i])) * - simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (-simde_float16_to_float32(b_.values[2 * i + 1])) * - simde_float16_to_float32(a_.values[2 * i])); - } -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + (-simde_float16_to_float32(b_.values[2 * i])) * + simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (-simde_float16_to_float32(b_.values[2 * i + 1])) * + simde_float16_to_float32(a_.values[2 * i])); + } + #endif return simde_float16x4_from_private(r_); -#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmla_rot180_laneq_f16 -#define vcmla_rot180_laneq_f16(r, a, b, lane) \ - simde_vcmla_rot180_laneq_f16(r, a, b, lane) + #undef vcmla_rot180_laneq_f16 + #define vcmla_rot180_laneq_f16(r, a, b, lane) simde_vcmla_rot180_laneq_f16(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmla_rot180_laneq_f16(r, a, b, lane) vcmla_rot180_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -255,37 +237,33 @@ simde_float32x2_t simde_vcmla_rot180_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x2_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_2_(vcmla_rot180_laneq_f32, result, - (HEDLEY_UNREACHABLE(), result), lane, r, a, b); -#else simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(simde_vdup_n_f32( simde_float32x4_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, -b_.values, 0, 1); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, -b_.values, 0, 1); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } + #endif return simde_float32x2_from_private(r_); -#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmla_rot180_laneq_f32 -#define vcmla_rot180_laneq_f32(r, a, b, lane) \ - simde_vcmla_rot180_laneq_f32(r, a, b, lane) + #undef vcmla_rot180_laneq_f32 + #define vcmla_rot180_laneq_f32(r, a, b, lane) simde_vcmla_rot180_laneq_f32(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmla_rot180_laneq_f32(r, a, b, lane) vcmla_rot180_laneq_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -293,47 +271,43 @@ simde_float16x8_t simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float16x8_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_4_(vcmlaq_rot180_laneq_f32, result, - (HEDLEY_UNREACHABLE(), result), lane, r, a, b); -#else simde_float16x8_private r_ = simde_float16x8_to_private(r), a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x8_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, - 4, 6, 6); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, - 5, 6, 7); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - (-simde_float16_to_float32(b_.values[2 * i])) * - simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (-simde_float16_to_float32(b_.values[2 * i + 1])) * - simde_float16_to_float32(a_.values[2 * i])); - } -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, + 4, 6, 6); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, + 5, 6, 7); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + (-simde_float16_to_float32(b_.values[2 * i])) * + simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (-simde_float16_to_float32(b_.values[2 * i + 1])) * + simde_float16_to_float32(a_.values[2 * i])); + } + #endif return simde_float16x8_from_private(r_); -#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmlaq_rot180_laneq_f16 -#define vcmlaq_rot180_laneq_f16(r, a, b, lane) \ - simde_vcmlaq_rot180_laneq_f16(r, a, b, lane) + #undef vcmlaq_rot180_laneq_f16 + #define vcmlaq_rot180_laneq_f16(r, a, b, lane) simde_vcmlaq_rot180_laneq_f16(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmlaq_rot180_laneq_f16(r, a, b, lane) vcmlaq_rot180_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -341,39 +315,33 @@ simde_float32x4_t simde_vcmlaq_rot180_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_2_(vcmlaq_rot180_laneq_f32, result, - (HEDLEY_UNREACHABLE(), result), lane, r, a, b); -#else simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(simde_vdupq_n_f32( simde_float32x4_to_private(b).values[lane])); - -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 0, 1, 2, 3); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 0, 1, 2, 3); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } + #endif return simde_float32x4_from_private(r_); -#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmlaq_rot180_laneq_f32 -#define vcmlaq_rot180_laneq_f32(r, a, b, lane) \ - simde_vcmlaq_rot180_laneq_f32(r, a, b, lane) + #undef vcmlaq_rot180_laneq_f32 + #define vcmlaq_rot180_laneq_f32(r, a, b, lane) simde_vcmlaq_rot180_laneq_f32(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmlaq_rot180_laneq_f32(r, a, b, lane) vcmlaq_rot180_laneq_f32(r, a, b, lane) #endif - SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP diff --git a/simde/arm/neon/cmla_rot270_lane.h b/simde/arm/neon/cmla_rot270_lane.h index 0967bcc9f..a5c1eb65f 100644 --- a/simde/arm/neon/cmla_rot270_lane.h +++ b/simde/arm/neon/cmla_rot270_lane.h @@ -40,45 +40,40 @@ simde_float16x4_t simde_vcmla_rot270_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x4_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_2_(vcmla_rot270_lane_f16, result, - (HEDLEY_UNREACHABLE(), result), lane, r, a, b); -#else simde_float16x4_private r_ = simde_float16x4_to_private(r), a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x4_to_private(b).values[lane])); - -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (-simde_float16_to_float32(b_.values[2 * i])) * - simde_float16_to_float32(a_.values[2 * i + 1])); - } -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + simde_float16_to_float32(b_.values[2 * i + 1]) * + simde_float16_to_float32(a_.values[2 * i + 1])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (-simde_float16_to_float32(b_.values[2 * i])) * + simde_float16_to_float32(a_.values[2 * i + 1])); + } + #endif return simde_float16x4_from_private(r_); -#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmla_rot270_lane_f16 -#define vcmla_rot270_lane_f16(r, a, b, lane) \ - simde_vcmla_rot270_lane_f16(r, a, b, lane) + #undef vcmla_rot270_lane_f16 + #define vcmla_rot270_lane_f16(r, a, b, lane) simde_vcmla_rot270_lane_f16(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmla_rot270_lane_f16(r, a, b, lane) vcmla_rot270_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -86,36 +81,32 @@ simde_float32x2_t simde_vcmla_rot270_lane_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcmla_rot270_lane_f32(r, a, b, 0); -#else simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(simde_vdup_n_f32( simde_float32x2_to_private(b).values[lane])); - -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } -#endif - + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } + #endif return simde_float32x2_from_private(r_); -#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmla_rot270_lane_f32 -#define vcmla_rot270_lane_f32(r, a, b, lane) \ - simde_vcmla_rot270_lane_f32(r, a, b, lane) + #undef vcmla_rot270_lane_f32 + #define vcmla_rot270_lane_f32(r, a, b, lane) simde_vcmla_rot270_lane_f32(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmla_rot270_lane_f32(r, a, b, lane) vcmla_rot270_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -123,47 +114,42 @@ simde_float16x8_t simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x8_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_2_(vcmlaq_rot270_lane_f16, result, - (HEDLEY_UNREACHABLE(), result), lane, r, a, b); -#else simde_float16x8_private r_ = simde_float16x8_to_private(r), a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x4_to_private(b).values[lane])); - -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, - 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, - 13, 4, 15, 6); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (-simde_float16_to_float32(b_.values[2 * i])) * - simde_float16_to_float32(a_.values[2 * i + 1])); - } -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, + 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, + 13, 4, 15, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + simde_float16_to_float32(b_.values[2 * i + 1]) * + simde_float16_to_float32(a_.values[2 * i + 1])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (-simde_float16_to_float32(b_.values[2 * i])) * + simde_float16_to_float32(a_.values[2 * i + 1])); + } + #endif return simde_float16x8_from_private(r_); -#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmlaq_rot270_lane_f16 -#define vcmlaq_rot270_lane_f16(r, a, b, lane) \ - simde_vcmlaq_rot270_lane_f16(r, a, b, lane) + #undef vcmlaq_rot270_lane_f16 + #define vcmlaq_rot270_lane_f16(r, a, b, lane) simde_vcmlaq_rot270_lane_f16(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmlaq_rot270_lane_f16(r, a, b, lane) vcmlaq_rot270_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -171,36 +157,32 @@ simde_float32x4_t simde_vcmlaq_rot270_lane_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcmlaq_rot270_lane_f32(r, a, b, 0); -#else simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(simde_vdupq_n_f32( simde_float32x2_to_private(b).values[lane])); - -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } -#endif - + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } + #endif return simde_float32x4_from_private(r_); -#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmlaq_rot270_lane_f32 -#define vcmlaq_rot270_lane_f32(r, a, b, lane) \ - simde_vcmlaq_rot270_lane_f32(r, a, b, lane) + #undef vcmlaq_rot270_lane_f32 + #define vcmlaq_rot270_lane_f32(r, a, b, lane) simde_vcmlaq_rot270_lane_f32(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmlaq_rot270_lane_f32(r, a, b, lane) vcmlaq_rot270_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -208,45 +190,40 @@ simde_float16x4_t simde_vcmla_rot270_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x4_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_2_(vcmla_rot270_laneq_f16, result, - (HEDLEY_UNREACHABLE(), result), lane, r, a, b); -#else simde_float16x4_private r_ = simde_float16x4_to_private(r), a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane])); - -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (-simde_float16_to_float32(b_.values[2 * i])) * - simde_float16_to_float32(a_.values[2 * i + 1])); - } -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + simde_float16_to_float32(b_.values[2 * i + 1]) * + simde_float16_to_float32(a_.values[2 * i + 1])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (-simde_float16_to_float32(b_.values[2 * i])) * + simde_float16_to_float32(a_.values[2 * i + 1])); + } + #endif return simde_float16x4_from_private(r_); -#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmla_rot270_laneq_f16 -#define vcmla_rot270_laneq_f16(r, a, b, lane) \ - simde_vcmla_rot270_laneq_f16(r, a, b, lane) + #undef vcmla_rot270_laneq_f16 + #define vcmla_rot270_laneq_f16(r, a, b, lane) simde_vcmla_rot270_laneq_f16(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmla_rot270_laneq_f16(r, a, b, lane) vcmla_rot270_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -254,13 +231,6 @@ simde_float32x2_t simde_vcmla_rot270_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x2_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_2_(vcmla_rot270_laneq_f32, result, - (HEDLEY_UNREACHABLE(), result), lane, r, a, b); -#else simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(simde_vdup_n_f32( @@ -279,12 +249,15 @@ simde_vcmla_rot270_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, } #endif return simde_float32x2_from_private(r_); -#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot270_laneq_f32 -#define vcmla_rot270_laneq_f32(r, a, b, lane) \ - simde_vcmla_rot270_laneq_f32(r, a, b, lane) +#define vcmla_rot270_laneq_f32(r, a, b, lane) simde_vcmla_rot270_laneq_f32(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmla_rot270_laneq_f32(r, a, b, lane) vcmla_rot270_laneq_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -292,47 +265,42 @@ simde_float16x8_t simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float16x8_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_4_(vcmlaq_rot270_laneq_f32, result, - (HEDLEY_UNREACHABLE(), result), lane, r, a, b); -#else simde_float16x8_private r_ = simde_float16x8_to_private(r), a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(simde_vdupq_n_f16( simde_float16x8_to_private(b).values[lane])); - -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, - 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, - 13, 4, 15, 6); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (-simde_float16_to_float32(b_.values[2 * i])) * - simde_float16_to_float32(a_.values[2 * i + 1])); - } -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, + 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, + 13, 4, 15, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i]) + + simde_float16_to_float32(b_.values[2 * i + 1]) * + simde_float16_to_float32(a_.values[2 * i + 1])); + r_.values[2 * i + 1] = simde_float16_from_float32( + simde_float16_to_float32(r_.values[2 * i + 1]) + + (-simde_float16_to_float32(b_.values[2 * i])) * + simde_float16_to_float32(a_.values[2 * i + 1])); + } + #endif return simde_float16x8_from_private(r_); -#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmlaq_rot270_laneq_f16 -#define vcmlaq_rot270_laneq_f16(r, a, b, lane) \ - simde_vcmlaq_rot270_laneq_f16(r, a, b, lane) + #undef vcmlaq_rot270_laneq_f16 + #define vcmlaq_rot270_laneq_f16(r, a, b, lane) simde_vcmlaq_rot270_laneq_f16(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmlaq_rot270_laneq_f16(r, a, b, lane) vcmlaq_rot270_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -340,37 +308,32 @@ simde_float32x4_t simde_vcmlaq_rot270_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_t result; -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - SIMDE_CONSTIFY_2_(vcmlaq_rot270_laneq_f32, result, - (HEDLEY_UNREACHABLE(), result), lane, r, a, b); -#else simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(simde_vdupq_n_f32( simde_float32x4_to_private(b).values[lane])); - -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } + #endif return simde_float32x4_from_private(r_); -#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmlaq_rot270_laneq_f32 -#define vcmlaq_rot270_laneq_f32(r, a, b, lane) \ - simde_vcmlaq_rot270_laneq_f32(r, a, b, lane) + #undef vcmlaq_rot270_laneq_f32 + #define vcmlaq_rot270_laneq_f32(r, a, b, lane) simde_vcmlaq_rot270_laneq_f32(r, a, b, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmlaq_rot270_laneq_f32(r, a, b, lane) vcmlaq_rot270_laneq_f32(r, a, b, lane) #endif SIMDE_END_DECLS_ From 3eac558ebbc37d2dbb990144d1069ec3ecc2be2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Tue, 17 Oct 2023 13:14:53 +0800 Subject: [PATCH 23/29] [Fix] : use another way to implement f16 functions --- simde/arm/neon/cmla_lane.h | 171 ++++++++++++------------ simde/arm/neon/cmla_rot180_lane.h | 127 +++++++++--------- simde/arm/neon/cmla_rot270_lane.h | 125 +++++++++--------- simde/arm/neon/cmla_rot90_lane.h | 211 +++++++++++++++--------------- 4 files changed, 321 insertions(+), 313 deletions(-) diff --git a/simde/arm/neon/cmla_lane.h b/simde/arm/neon/cmla_lane.h index 7537aacf1..ec2c1d80e 100644 --- a/simde/arm/neon/cmla_lane.h +++ b/simde/arm/neon/cmla_lane.h @@ -31,6 +31,10 @@ #include "dup_lane.h" #include "mul.h" #include "types.h" +#include "get_high.h" +#include "get_low.h" +#include "combine.h" +#include "cvt.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ @@ -39,32 +43,32 @@ SIMDE_FUNCTION_ATTRIBUTES simde_float16x4_t simde_vcmla_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x4_private r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16( - simde_float16x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + simde_float16x4_private + r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); r_.values += b_.values * a_.values; + return simde_float16x4_from_private(r_); #else + simde_float32x4_private r_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x4_to_private(b).values[lane]))); SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - simde_float16_to_float32(b_.values[lane]) * - simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - simde_float16_to_float32(b_.values[lane]) * - simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; } + return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); #endif - return simde_float16x4_from_private(r_); - } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_lane_f16 @@ -80,66 +84,64 @@ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vcmla_lane_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcmla_lane_f32(r, a, b, 0); -#else simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(simde_vdup_n_f32( simde_float32x2_to_private(b).values[lane])); - -#if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; - } -#endif - + #if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); + i++) { + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + } + #endif return simde_float32x2_from_private(r_); -#endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_lane_f32 #define vcmla_lane_f32(r, a, b, lane) simde_vcmla_lane_f32(r, a, b, lane) #endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + #define simde_vcmla_lane_f32(r, a, b, lane) vcmla_lane_f32(r, a, b, lane) +#endif SIMDE_FUNCTION_ATTRIBUTES simde_float16x4_t simde_vcmla_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x4_private r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + simde_float16x4_private + r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); r_.values += b_.values * a_.values; + return simde_float16x4_from_private(r_); #else + simde_float32x4_private r_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane]))); SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - simde_float16_to_float32(b_.values[lane]) * - simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - simde_float16_to_float32(b_.values[lane]) * - simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; } + return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); #endif - return simde_float16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_laneq_f16 @@ -160,7 +162,6 @@ simde_float32x2_t simde_vcmla_laneq_f32(simde_float32x2_t r, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(simde_vdup_n_f32( simde_float32x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); r_.values += b_.values * a_.values; @@ -189,33 +190,29 @@ simde_float16x8_t simde_vcmlaq_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x8_private r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16( - simde_float16x4_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, - 4, 6, 6); + simde_float16x8_private r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16( + simde_float16x4_to_private(b).values[lane])); + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, + 0, 0, 2, 2, 4, 4, 6, 6); r_.values += b_.values * a_.values; + return simde_float16x8_from_private(r_); #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - simde_float16_to_float32(b_.values[lane]) * - simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - simde_float16_to_float32(b_.values[lane]) * - simde_float16_to_float32(a_.values[2 * i])); - } + simde_float16x4_t high, low; + SIMDE_CONSTIFY_2_( + simde_vcmla_lane_f16, high, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + lane, simde_vget_high_f16(r), simde_vget_high_f16(a), b); + SIMDE_CONSTIFY_2_( + simde_vcmla_lane_f16, low, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + lane, simde_vget_low_f16(r), simde_vget_low_f16(a), b); + return simde_vcombine_f16(low, high); #endif - return simde_float16x8_from_private(r_); - } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_lane_f16 @@ -264,32 +261,30 @@ simde_float16x8_t simde_vcmlaq_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float16x8_private r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16( - simde_float16x8_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + simde_float16x8_private r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16( + simde_float16x8_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); r_.values += b_.values * a_.values; + return simde_float16x8_from_private(r_); #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - simde_float16_to_float32(b_.values[lane]) * - simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - simde_float16_to_float32(b_.values[lane]) * - simde_float16_to_float32(a_.values[2 * i])); - } + simde_float16x4_t high, low, b_ = simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane]); + SIMDE_CONSTIFY_2_( + simde_vcmla_lane_f16, high, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + 0, simde_vget_high_f16(r), simde_vget_high_f16(a), b_); + SIMDE_CONSTIFY_2_( + simde_vcmla_lane_f16, low, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + 0, simde_vget_low_f16(r), simde_vget_low_f16(a), b_); + return simde_vcombine_f16(low, high); #endif - return simde_float16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_laneq_f16 diff --git a/simde/arm/neon/cmla_rot180_lane.h b/simde/arm/neon/cmla_rot180_lane.h index 7782c1fa5..d3e583dda 100644 --- a/simde/arm/neon/cmla_rot180_lane.h +++ b/simde/arm/neon/cmla_rot180_lane.h @@ -31,6 +31,10 @@ #include "dup_lane.h" #include "mul.h" #include "types.h" +#include "cvt.h" +#include "get_high.h" +#include "get_low.h" +#include "combine.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ @@ -40,31 +44,33 @@ simde_float16x4_t simde_vcmla_rot180_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x4_private r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16( - simde_float16x4_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + simde_float16x4_private r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16( + simde_float16x4_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); r_.values += b_.values * a_.values; + return simde_float16x4_from_private(r_); #else + simde_float32x4_private r_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x4_to_private(b).values[lane]))); SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - (-simde_float16_to_float32(b_.values[2 * i])) * - simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (-simde_float16_to_float32(b_.values[2 * i + 1])) * - simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; } + return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); #endif - return simde_float16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot180_lane_f16 @@ -117,33 +123,31 @@ simde_float16x8_t simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x8_private r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16( - simde_float16x4_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + simde_float16x8_private r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16( + simde_float16x4_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, 5, 6, 7); r_.values += b_.values * a_.values; + return simde_float16x8_from_private(r_); #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - (-simde_float16_to_float32(b_.values[2 * i])) * - simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (-simde_float16_to_float32(b_.values[2 * i + 1])) * - simde_float16_to_float32(a_.values[2 * i])); - } + simde_float16x4_t high, low; + SIMDE_CONSTIFY_2_( + simde_vcmla_rot180_lane_f16, high, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + lane, simde_vget_high_f16(r), simde_vget_high_f16(a), b); + SIMDE_CONSTIFY_2_( + simde_vcmla_rot180_lane_f16, low, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + lane, simde_vget_low_f16(r), simde_vget_low_f16(a), b); + return simde_vcombine_f16(low, high); #endif - return simde_float16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot180_lane_f16 @@ -195,32 +199,33 @@ simde_float16x4_t simde_vcmla_rot180_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x4_private r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + simde_float16x4_private r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); r_.values += b_.values * a_.values; + return simde_float16x4_from_private(r_); #else + simde_float32x4_private r_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane]))); SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - (-simde_float16_to_float32(b_.values[2 * i])) * - simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (-simde_float16_to_float32(b_.values[2 * i + 1])) * - simde_float16_to_float32(a_.values[2 * i])); + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; } + return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); #endif - return simde_float16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot180_laneq_f16 @@ -271,34 +276,32 @@ simde_float16x8_t simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float16x8_private r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16( - simde_float16x8_to_private(b).values[lane])); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + simde_float16x8_private r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16( + simde_float16x8_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, 4, 6, 6); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, 5, 6, 7); r_.values += b_.values * a_.values; + return simde_float16x8_from_private(r_); #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - (-simde_float16_to_float32(b_.values[2 * i])) * - simde_float16_to_float32(a_.values[2 * i])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (-simde_float16_to_float32(b_.values[2 * i + 1])) * - simde_float16_to_float32(a_.values[2 * i])); - } + simde_float16x4_t high, low, b_ = simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane]); + SIMDE_CONSTIFY_2_( + simde_vcmla_rot180_lane_f16, high, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + 0, simde_vget_high_f16(r), simde_vget_high_f16(a), b_); + SIMDE_CONSTIFY_2_( + simde_vcmla_rot180_lane_f16, low, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + 0, simde_vget_low_f16(r), simde_vget_low_f16(a), b_); + return simde_vcombine_f16(low, high); #endif - return simde_float16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot180_laneq_f16 diff --git a/simde/arm/neon/cmla_rot270_lane.h b/simde/arm/neon/cmla_rot270_lane.h index a5c1eb65f..b9dc91c45 100644 --- a/simde/arm/neon/cmla_rot270_lane.h +++ b/simde/arm/neon/cmla_rot270_lane.h @@ -31,6 +31,10 @@ #include "dup_lane.h" #include "mul.h" #include "types.h" +#include "cvt.h" +#include "get_high.h" +#include "get_low.h" +#include "combine.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ @@ -40,31 +44,33 @@ simde_float16x4_t simde_vcmla_rot270_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x4_private r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16( - simde_float16x4_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + simde_float16x4_private r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16( + simde_float16x4_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); r_.values += b_.values * a_.values; + return simde_float16x4_from_private(r_); #else + simde_float32x4_private r_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x4_to_private(b).values[lane]))); SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (-simde_float16_to_float32(b_.values[2 * i])) * - simde_float16_to_float32(a_.values[2 * i + 1])); + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; } + return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); #endif - return simde_float16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot270_lane_f16 @@ -114,33 +120,31 @@ simde_float16x8_t simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x8_private r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16( - simde_float16x4_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + simde_float16x8_private r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16( + simde_float16x4_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, 13, 4, 15, 6); r_.values += b_.values * a_.values; + return simde_float16x8_from_private(r_); #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (-simde_float16_to_float32(b_.values[2 * i])) * - simde_float16_to_float32(a_.values[2 * i + 1])); - } + simde_float16x4_t high, low; + SIMDE_CONSTIFY_2_( + simde_vcmla_rot270_lane_f16, high, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + lane, simde_vget_high_f16(r), simde_vget_high_f16(a), b); + SIMDE_CONSTIFY_2_( + simde_vcmla_rot270_lane_f16, low, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + lane, simde_vget_low_f16(r), simde_vget_low_f16(a), b); + return simde_vcombine_f16(low, high); #endif - return simde_float16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot270_lane_f16 @@ -190,31 +194,33 @@ simde_float16x4_t simde_vcmla_rot270_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x4_private r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + simde_float16x4_private r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); r_.values += b_.values * a_.values; + return simde_float16x4_from_private(r_); #else + simde_float32x4_private r_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane]))); SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (-simde_float16_to_float32(b_.values[2 * i])) * - simde_float16_to_float32(a_.values[2 * i + 1])); + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; } + return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); #endif - return simde_float16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot270_laneq_f16 @@ -265,33 +271,32 @@ simde_float16x8_t simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float16x8_private r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16( - simde_float16x8_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + simde_float16x8_private r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16( + simde_float16x8_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, 13, 4, 15, 6); r_.values += b_.values * a_.values; + return simde_float16x8_from_private(r_); #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i + 1])); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (-simde_float16_to_float32(b_.values[2 * i])) * - simde_float16_to_float32(a_.values[2 * i + 1])); - } + simde_float16x4_t high, low, b_ = simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane]); + SIMDE_CONSTIFY_2_( + simde_vcmla_rot270_lane_f16, high, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + 0, simde_vget_high_f16(r), simde_vget_high_f16(a), b_); + SIMDE_CONSTIFY_2_( + simde_vcmla_rot270_lane_f16, low, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + 0, simde_vget_low_f16(r), simde_vget_low_f16(a), b_); + return simde_vcombine_f16(low, high); #endif - return simde_float16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot270_laneq_f16 diff --git a/simde/arm/neon/cmla_rot90_lane.h b/simde/arm/neon/cmla_rot90_lane.h index c625b132e..21a530812 100644 --- a/simde/arm/neon/cmla_rot90_lane.h +++ b/simde/arm/neon/cmla_rot90_lane.h @@ -29,6 +29,9 @@ #include "add.h" #include "cvt.h" +#include "get_high.h" +#include "get_low.h" +#include "combine.h" #include "dup_lane.h" #include "mul.h" #include "types.h" @@ -41,28 +44,33 @@ simde_float16x4_t simde_vcmla_rot90_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_private r_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x4_to_private(b).values[lane]))); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + simde_float16x4_private + r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); r_.values += b_.values * a_.values; + return simde_float16x4_from_private(r_); #else + simde_float32x4_private r_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x4_to_private(b).values[lane]))); SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; } + return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); #endif - return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot90_lane_f16 @@ -110,61 +118,61 @@ simde_vcmla_rot90_lane_f32(simde_float32x2_t r, simde_float32x2_t a, #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, simde_float16x8_t a, - simde_float16x4_t b, const int lane) +simde_float16x4_t +simde_vcmla_rot90_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, + simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float16x8_private r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16( - simde_float16x4_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, - 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, - 5, 12, 7, 14); + simde_float16x4_private + r_ = simde_float16x4_to_private(r), + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane])); + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); r_.values += b_.values * a_.values; + return simde_float16x4_from_private(r_); #else + simde_float32x4_private r_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane]))); SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - (-simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i + 1]))); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (simde_float16_to_float32(b_.values[2 * i]) * - simde_float16_to_float32(a_.values[2 * i + 1]))); + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; } + return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); #endif - return simde_float16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot90_lane_f16 - #define vcmlaq_rot90_lane_f16(r, a, b, lane) simde_vcmlaq_rot90_lane_f16(r, a, b, lane) + #undef vcmla_rot90_laneq_f16 + #define vcmla_rot90_laneq_f16(r, a, b, lane) simde_vcmla_rot90_laneq_f16(r, a, b, lane) #endif #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmlaq_rot90_lane_f16(r, a, b, lane) vcmlaq_rot90_lane_f16(r, a, b, lane) + #define simde_vcmla_rot90_laneq_f16(r, a, b, lane) vcmla_rot90_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcmlaq_rot90_lane_f32(simde_float32x4_t r, simde_float32x4_t a, - simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_float32x4_private r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32( - simde_float32x2_to_private(b).values[lane])); +simde_float32x2_t +simde_vcmla_rot90_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, + simde_float32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x2_private r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32( + simde_float32x4_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE @@ -174,73 +182,72 @@ simde_vcmlaq_rot90_lane_f32(simde_float32x4_t r, simde_float32x4_t a, r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; } #endif - return simde_float32x4_from_private(r_); + return simde_float32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmlaq_rot90_lane_f32 - #define vcmlaq_rot90_lane_f32(r, a, b, lane) simde_vcmlaq_rot90_lane_f32(r, a, b, lane) + #undef vcmla_rot90_laneq_f32 + #define vcmla_rot90_laneq_f32(r, a, b, lane) simde_vcmla_rot90_laneq_f32(r, a, b, lane) #endif #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmlaq_rot90_lane_f32(r, a, b, lane) vcmlaq_rot90_lane_f32(r, a, b, lane) + #define simde_vcmla_rot90_laneq_f32(r, a, b, lane) vcmla_rot90_laneq_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcmla_rot90_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, - simde_float16x8_t b, const int lane) +simde_float16x8_t +simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, simde_float16x8_t a, + simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - - simde_float16x4_private r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); + simde_float16x8_private r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16( + simde_float16x4_to_private(b).values[lane])); + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, + 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, + 5, 12, 7, 14); r_.values += b_.values * a_.values; + return simde_float16x8_from_private(r_); #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - (-simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i + 1]))); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (simde_float16_to_float32(b_.values[2 * i]) * - simde_float16_to_float32(a_.values[2 * i + 1]))); - } + simde_float16x4_t high, low; + SIMDE_CONSTIFY_2_( + simde_vcmla_rot90_lane_f16, high, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + lane, simde_vget_high_f16(r), simde_vget_high_f16(a), b); + SIMDE_CONSTIFY_2_( + simde_vcmla_rot90_lane_f16, low, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + lane, simde_vget_low_f16(r), simde_vget_low_f16(a), b); + return simde_vcombine_f16(low, high); #endif - return simde_float16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot90_laneq_f16 - #define vcmla_rot90_laneq_f16(r, a, b, lane) simde_vcmla_rot90_laneq_f16(r, a, b, lane) + #undef vcmlaq_rot90_lane_f16 + #define vcmlaq_rot90_lane_f16(r, a, b, lane) simde_vcmlaq_rot90_lane_f16(r, a, b, lane) #endif #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmla_rot90_laneq_f16(r, a, b, lane) vcmla_rot90_laneq_f16(r, a, b, lane) + #define simde_vcmlaq_rot90_lane_f16(r, a, b, lane) vcmlaq_rot90_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcmla_rot90_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, - simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x2_private r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32( - simde_float32x4_to_private(b).values[lane])); +simde_float32x4_t +simde_vcmlaq_rot90_lane_f32(simde_float32x4_t r, simde_float32x4_t a, + simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_float32x4_private r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32( + simde_float32x2_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE @@ -250,16 +257,16 @@ simde_vcmla_rot90_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; } #endif - return simde_float32x2_from_private(r_); + return simde_float32x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) - #undef vcmla_rot90_laneq_f32 - #define vcmla_rot90_laneq_f32(r, a, b, lane) simde_vcmla_rot90_laneq_f32(r, a, b, lane) + #undef vcmlaq_rot90_lane_f32 + #define vcmlaq_rot90_lane_f32(r, a, b, lane) simde_vcmlaq_rot90_lane_f32(r, a, b, lane) #endif #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmla_rot90_laneq_f32(r, a, b, lane) vcmla_rot90_laneq_f32(r, a, b, lane) + #define simde_vcmlaq_rot90_lane_f32(r, a, b, lane) vcmlaq_rot90_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -267,34 +274,32 @@ simde_float16x8_t simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - - simde_float16x8_private r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16( - simde_float16x8_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + simde_float16x8_private r_ = simde_float16x8_to_private(r), + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(simde_vdupq_n_f16( + simde_float16x8_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, 5, 7, 7); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, 5, 12, 7, 14); r_.values += b_.values * a_.values; + return simde_float16x8_from_private(r_); #else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i]) + - (-simde_float16_to_float32(b_.values[2 * i + 1]) * - simde_float16_to_float32(a_.values[2 * i + 1]))); - r_.values[2 * i + 1] = simde_float16_from_float32( - simde_float16_to_float32(r_.values[2 * i + 1]) + - (simde_float16_to_float32(b_.values[2 * i]) * - simde_float16_to_float32(a_.values[2 * i + 1]))); - } + simde_float16x4_t high, low, b_ = simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane]); + SIMDE_CONSTIFY_2_( + simde_vcmla_rot90_lane_f16, high, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + 0, simde_vget_high_f16(r), simde_vget_high_f16(a), b_); + SIMDE_CONSTIFY_2_( + simde_vcmla_rot90_lane_f16, low, + (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), + 0, simde_vget_low_f16(r), simde_vget_low_f16(a), b_); + return simde_vcombine_f16(low, high); #endif - return simde_float16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot90_laneq_f16 From e966085581f2c36bffa4b62a48350a37dbc1cf6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Tue, 17 Oct 2023 13:51:41 +0800 Subject: [PATCH 24/29] [Fix] : use implementation of f16 functions --- simde/arm/neon/cmla_lane.h | 42 ++++++++++++------------------- simde/arm/neon/cmla_rot180_lane.h | 42 ++++++++++++------------------- simde/arm/neon/cmla_rot270_lane.h | 42 ++++++++++++------------------- simde/arm/neon/cmla_rot90_lane.h | 38 ++++++++++++---------------- 4 files changed, 64 insertions(+), 100 deletions(-) diff --git a/simde/arm/neon/cmla_lane.h b/simde/arm/neon/cmla_lane.h index ec2c1d80e..7dc1047d2 100644 --- a/simde/arm/neon/cmla_lane.h +++ b/simde/arm/neon/cmla_lane.h @@ -43,32 +43,27 @@ SIMDE_FUNCTION_ATTRIBUTES simde_float16x4_t simde_vcmla_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x4_private r_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x4_to_private(b).values[lane]))); #if defined(SIMDE_SHUFFLE_VECTOR_) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - simde_float16x4_private - r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); r_.values += b_.values * a_.values; - return simde_float16x4_from_private(r_); #else - simde_float32x4_private r_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x4_to_private(b).values[lane]))); SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; } - return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); #endif + return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_lane_f16 @@ -116,32 +111,27 @@ simde_float16x4_t simde_vcmla_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x4_private r_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane]))); #if defined(SIMDE_SHUFFLE_VECTOR_) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - simde_float16x4_private - r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); r_.values += b_.values * a_.values; - return simde_float16x4_from_private(r_); #else - simde_float32x4_private r_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane]))); SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; } - return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); #endif + return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_laneq_f16 diff --git a/simde/arm/neon/cmla_rot180_lane.h b/simde/arm/neon/cmla_rot180_lane.h index d3e583dda..5e9617e9b 100644 --- a/simde/arm/neon/cmla_rot180_lane.h +++ b/simde/arm/neon/cmla_rot180_lane.h @@ -44,33 +44,28 @@ simde_float16x4_t simde_vcmla_rot180_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x4_private r_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x4_to_private(b).values[lane]))); #if defined(SIMDE_SHUFFLE_VECTOR_) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - simde_float16x4_private r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16( - simde_float16x4_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); r_.values += b_.values * a_.values; - return simde_float16x4_from_private(r_); #else - simde_float32x4_private r_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x4_to_private(b).values[lane]))); SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; } - return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); #endif + return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot180_lane_f16 @@ -199,33 +194,28 @@ simde_float16x4_t simde_vcmla_rot180_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x4_private r_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane]))); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - simde_float16x4_private r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); r_.values += b_.values * a_.values; - return simde_float16x4_from_private(r_); #else - simde_float32x4_private r_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane]))); SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; } - return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); #endif + return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot180_laneq_f16 diff --git a/simde/arm/neon/cmla_rot270_lane.h b/simde/arm/neon/cmla_rot270_lane.h index b9dc91c45..96b2052bd 100644 --- a/simde/arm/neon/cmla_rot270_lane.h +++ b/simde/arm/neon/cmla_rot270_lane.h @@ -44,33 +44,28 @@ simde_float16x4_t simde_vcmla_rot270_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x4_private r_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x4_to_private(b).values[lane]))); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - simde_float16x4_private r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16( - simde_float16x4_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); r_.values += b_.values * a_.values; - return simde_float16x4_from_private(r_); #else - simde_float32x4_private r_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x4_to_private(b).values[lane]))); SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; } - return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); #endif + return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot270_lane_f16 @@ -194,33 +189,28 @@ simde_float16x4_t simde_vcmla_rot270_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x4_private r_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane]))); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - simde_float16x4_private r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); r_.values += b_.values * a_.values; - return simde_float16x4_from_private(r_); #else - simde_float32x4_private r_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane]))); SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; } - return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); #endif + return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot270_laneq_f16 diff --git a/simde/arm/neon/cmla_rot90_lane.h b/simde/arm/neon/cmla_rot90_lane.h index 21a530812..355509f35 100644 --- a/simde/arm/neon/cmla_rot90_lane.h +++ b/simde/arm/neon/cmla_rot90_lane.h @@ -44,33 +44,28 @@ simde_float16x4_t simde_vcmla_rot90_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x4_private r_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x4_to_private(b).values[lane]))); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - simde_float16x4_private - r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); r_.values += b_.values * a_.values; - return simde_float16x4_from_private(r_); #else - simde_float32x4_private r_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x4_to_private(b).values[lane]))); SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; } - return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); #endif + return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot90_lane_f16 @@ -122,6 +117,13 @@ simde_float16x4_t simde_vcmla_rot90_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x4_private r_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = + simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane]))); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) @@ -132,23 +134,15 @@ simde_vcmla_rot90_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); r_.values += b_.values * a_.values; - return simde_float16x4_from_private(r_); #else - simde_float32x4_private r_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane]))); SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) { r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; } - return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); #endif + return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot90_laneq_f16 From 20131b35a218466eb7548b80792d4b40929e2046 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Tue, 17 Oct 2023 14:20:15 +0800 Subject: [PATCH 25/29] [Fix] : delete conflicting type --- simde/arm/neon/cmla_rot90_lane.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/simde/arm/neon/cmla_rot90_lane.h b/simde/arm/neon/cmla_rot90_lane.h index 355509f35..c78d51cc7 100644 --- a/simde/arm/neon/cmla_rot90_lane.h +++ b/simde/arm/neon/cmla_rot90_lane.h @@ -127,10 +127,6 @@ simde_vcmla_rot90_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - simde_float16x4_private - r_ = simde_float16x4_to_private(r), - a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane])); a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); r_.values += b_.values * a_.values; From 0c531c38d627ad3e4bc34d2163f0449b0c52bfad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Tue, 17 Oct 2023 15:08:30 +0800 Subject: [PATCH 26/29] [Fix] : another implementation for vcmla{/q}_rot{180/270/90}_lane{/q}_f16 and vcmla{/q}_lane{/q}_f16 --- simde/arm/neon/cmla_lane.h | 63 ++++++++++++++++++++----------- simde/arm/neon/cmla_rot180_lane.h | 63 ++++++++++++++++++++----------- simde/arm/neon/cmla_rot270_lane.h | 63 ++++++++++++++++++++----------- simde/arm/neon/cmla_rot90_lane.h | 63 ++++++++++++++++++++----------- 4 files changed, 168 insertions(+), 84 deletions(-) diff --git a/simde/arm/neon/cmla_lane.h b/simde/arm/neon/cmla_lane.h index 7dc1047d2..3ec5b167e 100644 --- a/simde/arm/neon/cmla_lane.h +++ b/simde/arm/neon/cmla_lane.h @@ -192,16 +192,27 @@ simde_float16x8_t simde_vcmlaq_lane_f16(simde_float16x8_t r, r_.values += b_.values * a_.values; return simde_float16x8_from_private(r_); #else - simde_float16x4_t high, low; - SIMDE_CONSTIFY_2_( - simde_vcmla_lane_f16, high, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - lane, simde_vget_high_f16(r), simde_vget_high_f16(a), b); - SIMDE_CONSTIFY_2_( - simde_vcmla_lane_f16, low, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - lane, simde_vget_low_f16(r), simde_vget_low_f16(a), b); - return simde_vcombine_f16(low, high); + simde_float32x4_private r_low = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), + a_low = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), + r_high = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), + a_high = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x4_to_private(b).values[lane]))); + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); + i++) { + r_low.values[2 * i] += b_.values[lane] * a_low.values[2 * i]; + r_low.values[2 * i + 1] += b_.values[lane] * a_low.values[2 * i]; + r_high.values[2 * i] += b_.values[lane] * a_high.values[2 * i]; + r_high.values[2 * i + 1] += b_.values[lane] * a_high.values[2 * i]; + } + return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) @@ -263,17 +274,27 @@ simde_float16x8_t simde_vcmlaq_laneq_f16(simde_float16x8_t r, r_.values += b_.values * a_.values; return simde_float16x8_from_private(r_); #else - simde_float16x4_t high, low, b_ = simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane]); - SIMDE_CONSTIFY_2_( - simde_vcmla_lane_f16, high, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - 0, simde_vget_high_f16(r), simde_vget_high_f16(a), b_); - SIMDE_CONSTIFY_2_( - simde_vcmla_lane_f16, low, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - 0, simde_vget_low_f16(r), simde_vget_low_f16(a), b_); - return simde_vcombine_f16(low, high); + simde_float32x4_private r_low = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), + a_low = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), + r_high = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), + a_high = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane]))); + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); + i++) { + r_low.values[2 * i] += b_.values[lane] * a_low.values[2 * i]; + r_low.values[2 * i + 1] += b_.values[lane] * a_low.values[2 * i]; + r_high.values[2 * i] += b_.values[lane] * a_high.values[2 * i]; + r_high.values[2 * i + 1] += b_.values[lane] * a_high.values[2 * i]; + } + return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) diff --git a/simde/arm/neon/cmla_rot180_lane.h b/simde/arm/neon/cmla_rot180_lane.h index 5e9617e9b..ee2f54366 100644 --- a/simde/arm/neon/cmla_rot180_lane.h +++ b/simde/arm/neon/cmla_rot180_lane.h @@ -132,16 +132,27 @@ simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, simde_float16x8_t a, r_.values += b_.values * a_.values; return simde_float16x8_from_private(r_); #else - simde_float16x4_t high, low; - SIMDE_CONSTIFY_2_( - simde_vcmla_rot180_lane_f16, high, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - lane, simde_vget_high_f16(r), simde_vget_high_f16(a), b); - SIMDE_CONSTIFY_2_( - simde_vcmla_rot180_lane_f16, low, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - lane, simde_vget_low_f16(r), simde_vget_low_f16(a), b); - return simde_vcombine_f16(low, high); + simde_float32x4_private r_low = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), + a_low = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), + r_high = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), + a_high = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x4_to_private(b).values[lane]))); + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); + i++) { + r_low.values[2 * i] += -(b_.values[2 * i]) * a_low.values[2 * i]; + r_low.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_low.values[2 * i]; + r_high.values[2 * i] += -(b_.values[2 * i]) * a_high.values[2 * i]; + r_high.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_high.values[2 * i]; + } + return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) @@ -280,17 +291,27 @@ simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, r_.values += b_.values * a_.values; return simde_float16x8_from_private(r_); #else - simde_float16x4_t high, low, b_ = simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane]); - SIMDE_CONSTIFY_2_( - simde_vcmla_rot180_lane_f16, high, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - 0, simde_vget_high_f16(r), simde_vget_high_f16(a), b_); - SIMDE_CONSTIFY_2_( - simde_vcmla_rot180_lane_f16, low, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - 0, simde_vget_low_f16(r), simde_vget_low_f16(a), b_); - return simde_vcombine_f16(low, high); + simde_float32x4_private r_low = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), + a_low = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), + r_high = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), + a_high = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane]))); + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); + i++) { + r_low.values[2 * i] += -(b_.values[2 * i]) * a_low.values[2 * i]; + r_low.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_low.values[2 * i]; + r_high.values[2 * i] += -(b_.values[2 * i]) * a_high.values[2 * i]; + r_high.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_high.values[2 * i]; + } + return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) diff --git a/simde/arm/neon/cmla_rot270_lane.h b/simde/arm/neon/cmla_rot270_lane.h index 96b2052bd..54e584ec0 100644 --- a/simde/arm/neon/cmla_rot270_lane.h +++ b/simde/arm/neon/cmla_rot270_lane.h @@ -129,16 +129,27 @@ simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, simde_float16x8_t a, r_.values += b_.values * a_.values; return simde_float16x8_from_private(r_); #else - simde_float16x4_t high, low; - SIMDE_CONSTIFY_2_( - simde_vcmla_rot270_lane_f16, high, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - lane, simde_vget_high_f16(r), simde_vget_high_f16(a), b); - SIMDE_CONSTIFY_2_( - simde_vcmla_rot270_lane_f16, low, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - lane, simde_vget_low_f16(r), simde_vget_low_f16(a), b); - return simde_vcombine_f16(low, high); + simde_float32x4_private r_low = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), + a_low = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), + r_high = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), + a_high = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x4_to_private(b).values[lane]))); + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); + i++) { + r_low.values[2 * i] += b_.values[2 * i + 1] * a_low.values[2 * i + 1]; + r_low.values[2 * i + 1] += -(b_.values[2 * i]) * a_low.values[2 * i + 1]; + r_high.values[2 * i] += b_.values[2 * i + 1] * a_high.values[2 * i + 1]; + r_high.values[2 * i + 1] += -(b_.values[2 * i]) * a_high.values[2 * i + 1]; + } + return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) @@ -275,17 +286,27 @@ simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, r_.values += b_.values * a_.values; return simde_float16x8_from_private(r_); #else - simde_float16x4_t high, low, b_ = simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane]); - SIMDE_CONSTIFY_2_( - simde_vcmla_rot270_lane_f16, high, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - 0, simde_vget_high_f16(r), simde_vget_high_f16(a), b_); - SIMDE_CONSTIFY_2_( - simde_vcmla_rot270_lane_f16, low, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - 0, simde_vget_low_f16(r), simde_vget_low_f16(a), b_); - return simde_vcombine_f16(low, high); + simde_float32x4_private r_low = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), + a_low = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), + r_high = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), + a_high = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane]))); + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); + i++) { + r_low.values[2 * i] += b_.values[2 * i + 1] * a_low.values[2 * i + 1]; + r_low.values[2 * i + 1] += -(b_.values[2 * i]) * a_low.values[2 * i + 1]; + r_high.values[2 * i] += b_.values[2 * i + 1] * a_high.values[2 * i + 1]; + r_high.values[2 * i + 1] += -(b_.values[2 * i]) * a_high.values[2 * i + 1]; + } + return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) diff --git a/simde/arm/neon/cmla_rot90_lane.h b/simde/arm/neon/cmla_rot90_lane.h index c78d51cc7..11b4254d3 100644 --- a/simde/arm/neon/cmla_rot90_lane.h +++ b/simde/arm/neon/cmla_rot90_lane.h @@ -203,16 +203,27 @@ simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, simde_float16x8_t a, r_.values += b_.values * a_.values; return simde_float16x8_from_private(r_); #else - simde_float16x4_t high, low; - SIMDE_CONSTIFY_2_( - simde_vcmla_rot90_lane_f16, high, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - lane, simde_vget_high_f16(r), simde_vget_high_f16(a), b); - SIMDE_CONSTIFY_2_( - simde_vcmla_rot90_lane_f16, low, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - lane, simde_vget_low_f16(r), simde_vget_low_f16(a), b); - return simde_vcombine_f16(low, high); + simde_float32x4_private r_low = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), + a_low = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), + r_high = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), + a_high = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x4_to_private(b).values[lane]))); + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); + i++) { + r_low.values[2 * i] += -(b_.values[2 * i + 1]) * a_low.values[2 * i + 1]; + r_low.values[2 * i + 1] += b_.values[2 * i] * a_low.values[2 * i + 1]; + r_high.values[2 * i] += -(b_.values[2 * i + 1]) * a_high.values[2 * i + 1]; + r_high.values[2 * i + 1] += b_.values[2 * i] * a_high.values[2 * i + 1]; + } + return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) @@ -278,17 +289,27 @@ simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, r_.values += b_.values * a_.values; return simde_float16x8_from_private(r_); #else - simde_float16x4_t high, low, b_ = simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane]); - SIMDE_CONSTIFY_2_( - simde_vcmla_rot90_lane_f16, high, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - 0, simde_vget_high_f16(r), simde_vget_high_f16(a), b_); - SIMDE_CONSTIFY_2_( - simde_vcmla_rot90_lane_f16, low, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - 0, simde_vget_low_f16(r), simde_vget_low_f16(a), b_); - return simde_vcombine_f16(low, high); + simde_float32x4_private r_low = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), + a_low = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), + r_high = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), + a_high = + simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16( + simde_float16x8_to_private(b).values[lane]))); + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); + i++) { + r_low.values[2 * i] += -(b_.values[2 * i + 1]) * a_low.values[2 * i + 1]; + r_low.values[2 * i + 1] += b_.values[2 * i] * a_low.values[2 * i + 1]; + r_high.values[2 * i] += -(b_.values[2 * i + 1]) * a_high.values[2 * i + 1]; + r_high.values[2 * i + 1] += b_.values[2 * i] * a_high.values[2 * i + 1]; + } + return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) From 788e06e5d201cba795410210a1a74dccc012b511 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Tue, 17 Oct 2023 15:55:29 +0800 Subject: [PATCH 27/29] [Fix] : implementation for vcmla{/q}_rot{180/270/90}_lane{/q}_f16 and vcmla{/q}_lane{/q}_f16 --- simde/arm/neon/cmla_lane.h | 56 +++++++++++++-------------- simde/arm/neon/cmla_rot180_lane.h | 64 +++++++++++++++---------------- simde/arm/neon/cmla_rot270_lane.h | 64 +++++++++++++++---------------- simde/arm/neon/cmla_rot90_lane.h | 64 +++++++++++++++---------------- 4 files changed, 116 insertions(+), 132 deletions(-) diff --git a/simde/arm/neon/cmla_lane.h b/simde/arm/neon/cmla_lane.h index 3ec5b167e..5701ab757 100644 --- a/simde/arm/neon/cmla_lane.h +++ b/simde/arm/neon/cmla_lane.h @@ -180,19 +180,7 @@ simde_float16x8_t simde_vcmlaq_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - simde_float16x8_private r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16( - simde_float16x4_to_private(b).values[lane])); - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, - 0, 0, 2, 2, 4, 4, 6, 6); - r_.values += b_.values * a_.values; - return simde_float16x8_from_private(r_); - #else - simde_float32x4_private r_low = + simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), @@ -203,6 +191,16 @@ simde_float16x8_t simde_vcmlaq_lane_f16(simde_float16x8_t r, b_ = simde_float32x4_to_private( simde_vcvt_f32_f16(simde_vdup_n_f16( simde_float16x4_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_low.values, a_low.values, + 0, 0, 2, 2, 4, 4, 6, 6); + a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_high.values, a_high.values, + 0, 0, 2, 2, 4, 4, 6, 6); + r_low.values += b_.values * a_low.values; + r_high.values += b_.values * a_high.values; + #else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) { @@ -211,9 +209,9 @@ simde_float16x8_t simde_vcmlaq_lane_f16(simde_float16x8_t r, r_high.values[2 * i] += b_.values[lane] * a_high.values[2 * i]; r_high.values[2 * i + 1] += b_.values[lane] * a_high.values[2 * i]; } - return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); #endif + return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_lane_f16 @@ -262,19 +260,7 @@ simde_float16x8_t simde_vcmlaq_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - simde_float16x8_private r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16( - simde_float16x8_to_private(b).values[lane])); - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, - 4, 6, 6); - r_.values += b_.values * a_.values; - return simde_float16x8_from_private(r_); - #else - simde_float32x4_private r_low = + simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), @@ -285,6 +271,16 @@ simde_float16x8_t simde_vcmlaq_laneq_f16(simde_float16x8_t r, b_ = simde_float32x4_to_private( simde_vcvt_f32_f16(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_low.values, a_low.values, 0, 0, 2, 2, 4, + 4, 6, 6); + r_low.values += b_.values * a_low.values; + a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_high.values, a_high.values, 0, 0, 2, 2, 4, + 4, 6, 6); + r_high.values += b_.values * a_high.values; + #else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) { @@ -293,9 +289,9 @@ simde_float16x8_t simde_vcmlaq_laneq_f16(simde_float16x8_t r, r_high.values[2 * i] += b_.values[lane] * a_high.values[2 * i]; r_high.values[2 * i + 1] += b_.values[lane] * a_high.values[2 * i]; } - return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); #endif + return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_laneq_f16 diff --git a/simde/arm/neon/cmla_rot180_lane.h b/simde/arm/neon/cmla_rot180_lane.h index ee2f54366..3e882afa0 100644 --- a/simde/arm/neon/cmla_rot180_lane.h +++ b/simde/arm/neon/cmla_rot180_lane.h @@ -118,21 +118,7 @@ simde_float16x8_t simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - simde_float16x8_private r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16( - simde_float16x4_to_private(b).values[lane])); - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, - 4, 6, 6); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, - 5, 6, 7); - r_.values += b_.values * a_.values; - return simde_float16x8_from_private(r_); - #else - simde_float32x4_private r_low = + simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), @@ -143,6 +129,18 @@ simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, simde_float16x8_t a, b_ = simde_float32x4_to_private( simde_vcvt_f32_f16(simde_vdup_n_f16( simde_float16x4_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_low.values, a_low.values, 0, 0, 2, 2, 4, + 4, 6, 6); + a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_high.values, a_high.values, 0, 0, 2, 2, 4, + 4, 6, 6); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, + 5, 6, 7); + r_low.values += b_.values * a_low.values; + r_high.values += b_.values * a_high.values; + #else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) { @@ -151,9 +149,9 @@ simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, simde_float16x8_t a, r_high.values[2 * i] += -(b_.values[2 * i]) * a_high.values[2 * i]; r_high.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_high.values[2 * i]; } - return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); #endif + return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot180_lane_f16 @@ -277,21 +275,7 @@ simde_float16x8_t simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - simde_float16x8_private r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16( - simde_float16x8_to_private(b).values[lane])); - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 0, 0, 2, 2, 4, - 4, 6, 6); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, - 5, 6, 7); - r_.values += b_.values * a_.values; - return simde_float16x8_from_private(r_); - #else - simde_float32x4_private r_low = + simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), @@ -302,6 +286,18 @@ simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, b_ = simde_float32x4_to_private( simde_vcvt_f32_f16(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_low.values, a_low.values, 0, 0, 2, 2, 4, + 4, 6, 6); + a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_high.values, a_high.values, 0, 0, 2, 2, 4, + 4, 6, 6); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, + 5, 6, 7); + r_low.values += b_.values * a_low.values; + r_high.values += b_.values * a_high.values; + #else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) { @@ -310,9 +306,9 @@ simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, r_high.values[2 * i] += -(b_.values[2 * i]) * a_high.values[2 * i]; r_high.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_high.values[2 * i]; } - return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); #endif + return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot180_laneq_f16 diff --git a/simde/arm/neon/cmla_rot270_lane.h b/simde/arm/neon/cmla_rot270_lane.h index 54e584ec0..8faddc51e 100644 --- a/simde/arm/neon/cmla_rot270_lane.h +++ b/simde/arm/neon/cmla_rot270_lane.h @@ -115,21 +115,7 @@ simde_float16x8_t simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - simde_float16x8_private r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16( - simde_float16x4_to_private(b).values[lane])); - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, - 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, - 13, 4, 15, 6); - r_.values += b_.values * a_.values; - return simde_float16x8_from_private(r_); - #else - simde_float32x4_private r_low = + simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), @@ -140,6 +126,18 @@ simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, simde_float16x8_t a, b_ = simde_float32x4_to_private( simde_vcvt_f32_f16(simde_vdup_n_f16( simde_float16x4_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_low.values, a_low.values, 1, 1, 3, 3, 5, + 5, 7, 7); + a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_high.values, a_high.values, 1, 1, 3, 3, 5, + 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, + 13, 4, 15, 6); + r_low.values += b_.values * a_low.values; + r_high.values += b_.values * a_high.values; + #else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) { @@ -148,9 +146,9 @@ simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, simde_float16x8_t a, r_high.values[2 * i] += b_.values[2 * i + 1] * a_high.values[2 * i + 1]; r_high.values[2 * i + 1] += -(b_.values[2 * i]) * a_high.values[2 * i + 1]; } - return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); #endif + return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot270_lane_f16 @@ -272,21 +270,7 @@ simde_float16x8_t simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - simde_float16x8_private r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16( - simde_float16x8_to_private(b).values[lane])); - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, - 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, - 13, 4, 15, 6); - r_.values += b_.values * a_.values; - return simde_float16x8_from_private(r_); - #else - simde_float32x4_private r_low = + simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), @@ -297,6 +281,18 @@ simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, b_ = simde_float32x4_to_private( simde_vcvt_f32_f16(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_high.values, a_high.values, 1, 1, 3, 3, 5, + 5, 7, 7); + a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_low.values, a_low.values, 1, 1, 3, 3, 5, + 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, + 13, 4, 15, 6); + r_high.values += b_.values * a_high.values; + r_low.values += b_.values * a_low.values; + #else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) { @@ -305,9 +301,9 @@ simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, r_high.values[2 * i] += b_.values[2 * i + 1] * a_high.values[2 * i + 1]; r_high.values[2 * i + 1] += -(b_.values[2 * i]) * a_high.values[2 * i + 1]; } - return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); #endif + return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot270_laneq_f16 diff --git a/simde/arm/neon/cmla_rot90_lane.h b/simde/arm/neon/cmla_rot90_lane.h index 11b4254d3..0e9ec8d4b 100644 --- a/simde/arm/neon/cmla_rot90_lane.h +++ b/simde/arm/neon/cmla_rot90_lane.h @@ -189,21 +189,7 @@ simde_float16x8_t simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - simde_float16x8_private r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16( - simde_float16x4_to_private(b).values[lane])); - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, - 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, - 5, 12, 7, 14); - r_.values += b_.values * a_.values; - return simde_float16x8_from_private(r_); - #else - simde_float32x4_private r_low = + simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), @@ -214,6 +200,18 @@ simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, simde_float16x8_t a, b_ = simde_float32x4_to_private( simde_vcvt_f32_f16(simde_vdup_n_f16( simde_float16x4_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_low.values, a_low.values, 1, 1, 3, 3, 5, + 5, 7, 7); + a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_high.values, a_high.values, 1, 1, 3, 3, 5, + 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, + 5, 12, 7, 14); + r_low.values += b_.values * a_low.values; + r_high.values += b_.values * a_high.values; + #else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) { @@ -222,9 +220,9 @@ simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, simde_float16x8_t a, r_high.values[2 * i] += -(b_.values[2 * i + 1]) * a_high.values[2 * i + 1]; r_high.values[2 * i + 1] += b_.values[2 * i] * a_high.values[2 * i + 1]; } - return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); #endif + return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot90_lane_f16 @@ -275,21 +273,7 @@ simde_float16x8_t simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - simde_float16x8_private r_ = simde_float16x8_to_private(r), - a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(simde_vdupq_n_f16( - simde_float16x8_to_private(b).values[lane])); - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 1, 3, 3, 5, - 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, - 5, 12, 7, 14); - r_.values += b_.values * a_.values; - return simde_float16x8_from_private(r_); - #else - simde_float32x4_private r_low = + simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), @@ -300,6 +284,18 @@ simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, b_ = simde_float32x4_to_private( simde_vcvt_f32_f16(simde_vdup_n_f16( simde_float16x8_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_low.values, a_low.values, 1, 1, 3, 3, 5, + 5, 7, 7); + a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_high.values, a_high.values, 1, 1, 3, 3, 5, + 5, 7, 7); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, + 5, 12, 7, 14); + r_low.values += b_.values * a_low.values; + r_high.values += b_.values * a_high.values; + #else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) { @@ -308,9 +304,9 @@ simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, r_high.values[2 * i] += -(b_.values[2 * i + 1]) * a_high.values[2 * i + 1]; r_high.values[2 * i + 1] += b_.values[2 * i] * a_high.values[2 * i + 1]; } - return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); #endif + return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot90_laneq_f16 From 790d47171f0f34e9178f4adfdd0f78b9ef4cb5c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Tue, 17 Oct 2023 16:14:03 +0800 Subject: [PATCH 28/29] [Fix] : elements in shuffle vector --- simde/arm/neon/cmla_lane.h | 12 ++++-------- simde/arm/neon/cmla_rot180_lane.h | 18 ++++++------------ simde/arm/neon/cmla_rot270_lane.h | 18 ++++++------------ simde/arm/neon/cmla_rot90_lane.h | 18 ++++++------------ 4 files changed, 22 insertions(+), 44 deletions(-) diff --git a/simde/arm/neon/cmla_lane.h b/simde/arm/neon/cmla_lane.h index 5701ab757..2b20c82e7 100644 --- a/simde/arm/neon/cmla_lane.h +++ b/simde/arm/neon/cmla_lane.h @@ -194,10 +194,8 @@ simde_float16x8_t simde_vcmlaq_lane_f16(simde_float16x8_t r, #if defined(SIMDE_SHUFFLE_VECTOR_) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_low.values, a_low.values, - 0, 0, 2, 2, 4, 4, 6, 6); - a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_high.values, a_high.values, - 0, 0, 2, 2, 4, 4, 6, 6); + a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 0, 0, 2, 2); + a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 0, 0, 2, 2); r_low.values += b_.values * a_low.values; r_high.values += b_.values * a_high.values; #else @@ -274,11 +272,9 @@ simde_float16x8_t simde_vcmlaq_laneq_f16(simde_float16x8_t r, #if defined(SIMDE_SHUFFLE_VECTOR_) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_low.values, a_low.values, 0, 0, 2, 2, 4, - 4, 6, 6); + a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 0, 0, 2, 2); r_low.values += b_.values * a_low.values; - a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_high.values, a_high.values, 0, 0, 2, 2, 4, - 4, 6, 6); + a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 0, 0, 2, 2); r_high.values += b_.values * a_high.values; #else SIMDE_VECTORIZE diff --git a/simde/arm/neon/cmla_rot180_lane.h b/simde/arm/neon/cmla_rot180_lane.h index 3e882afa0..328986f9e 100644 --- a/simde/arm/neon/cmla_rot180_lane.h +++ b/simde/arm/neon/cmla_rot180_lane.h @@ -132,12 +132,9 @@ simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, simde_float16x8_t a, #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_low.values, a_low.values, 0, 0, 2, 2, 4, - 4, 6, 6); - a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_high.values, a_high.values, 0, 0, 2, 2, 4, - 4, 6, 6); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, - 5, 6, 7); + a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 0, 0, 2, 2); + a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 0, 0, 2, 2); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); r_low.values += b_.values * a_low.values; r_high.values += b_.values * a_high.values; #else @@ -289,12 +286,9 @@ simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_low.values, a_low.values, 0, 0, 2, 2, 4, - 4, 6, 6); - a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_high.values, a_high.values, 0, 0, 2, 2, 4, - 4, 6, 6); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 0, 1, 2, 3, 4, - 5, 6, 7); + a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 0, 0, 2, 2); + a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 0, 0, 2, 2); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); r_low.values += b_.values * a_low.values; r_high.values += b_.values * a_high.values; #else diff --git a/simde/arm/neon/cmla_rot270_lane.h b/simde/arm/neon/cmla_rot270_lane.h index 8faddc51e..e2d690f8e 100644 --- a/simde/arm/neon/cmla_rot270_lane.h +++ b/simde/arm/neon/cmla_rot270_lane.h @@ -129,12 +129,9 @@ simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, simde_float16x8_t a, #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_low.values, a_low.values, 1, 1, 3, 3, 5, - 5, 7, 7); - a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_high.values, a_high.values, 1, 1, 3, 3, 5, - 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, - 13, 4, 15, 6); + a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 1, 1, 3, 3); + a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); r_low.values += b_.values * a_low.values; r_high.values += b_.values * a_high.values; #else @@ -284,12 +281,9 @@ simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_high.values, a_high.values, 1, 1, 3, 3, 5, - 5, 7, 7); - a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_low.values, a_low.values, 1, 1, 3, 3, 5, - 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, - 13, 4, 15, 6); + a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 1, 1, 3, 3); + a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); r_high.values += b_.values * a_high.values; r_low.values += b_.values * a_low.values; #else diff --git a/simde/arm/neon/cmla_rot90_lane.h b/simde/arm/neon/cmla_rot90_lane.h index 0e9ec8d4b..af9f044ef 100644 --- a/simde/arm/neon/cmla_rot90_lane.h +++ b/simde/arm/neon/cmla_rot90_lane.h @@ -203,12 +203,9 @@ simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, simde_float16x8_t a, #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_low.values, a_low.values, 1, 1, 3, 3, 5, - 5, 7, 7); - a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_high.values, a_high.values, 1, 1, 3, 3, 5, - 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, - 5, 12, 7, 14); + a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 1, 1, 3, 3); + a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); r_low.values += b_.values * a_low.values; r_high.values += b_.values * a_high.values; #else @@ -287,12 +284,9 @@ simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, #if defined(SIMDE_SHUFFLE_VECTOR_) && \ ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_low.values, a_low.values, 1, 1, 3, 3, 5, - 5, 7, 7); - a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_high.values, a_high.values, 1, 1, 3, 3, 5, - 5, 7, 7); - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, - 5, 12, 7, 14); + a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 1, 1, 3, 3); + a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); r_low.values += b_.values * a_low.values; r_high.values += b_.values * a_high.values; #else From 19ed113857425207b1729c2fc1560d94a8981f94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=AD=A3=E8=91=B3?= Date: Tue, 17 Oct 2023 19:21:09 +0800 Subject: [PATCH 29/29] [Fix] : formatting with ColumnLimit = 125, IdentWidth = 2, TabWidth = 4 --- simde/arm/neon/cadd_rot270.h | 255 +++--- simde/arm/neon/cadd_rot90.h | 250 +++--- simde/arm/neon/cmla_lane.h | 258 +++---- simde/arm/neon/cmla_rot180_lane.h | 262 +++---- simde/arm/neon/cmla_rot270_lane.h | 253 +++--- simde/arm/neon/cmla_rot90_lane.h | 229 +++--- test/arm/neon/cadd_rot270.c | 10 +- test/arm/neon/cadd_rot90.c | 441 +++++------ test/arm/neon/cmla_lane.c | 1199 +++++++++++------------------ test/arm/neon/cmla_rot180_lane.c | 1185 +++++++++++----------------- test/arm/neon/cmla_rot270_lane.c | 1197 +++++++++++----------------- test/arm/neon/cmla_rot90_lane.c | 1180 +++++++++++----------------- 12 files changed, 2683 insertions(+), 4036 deletions(-) diff --git a/simde/arm/neon/cadd_rot270.h b/simde/arm/neon/cadd_rot270.h index 5b1fad11c..129a6a91a 100644 --- a/simde/arm/neon/cadd_rot270.h +++ b/simde/arm/neon/cadd_rot270.h @@ -34,164 +34,147 @@ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vcadd_rot270_f16(simde_float16x4_t a, - simde_float16x4_t b) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcadd_rot270_f16(a, b); -#else - simde_float16x4_private r_, a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); - r_.values = b_.values + a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16(b_.values[2 * i + 1], a_.values[2 * i]); - r_.values[2 * i + 1] = simde_vaddh_f16( - simde_float16_from_float32(-simde_float16_to_float32(b_.values[2 * i])), - a_.values[2 * i + 1]); - } -#endif - return simde_float16x4_from_private(r_); -#endif +simde_float16x4_t simde_vcadd_rot270_f16(simde_float16x4_t a, simde_float16x4_t b) +{ + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcadd_rot270_f16(a, b); + #else + simde_float16x4_private r_, a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(b); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { + r_.values[2 * i] = simde_vaddh_f16(b_.values[2 * i + 1], a_.values[2 * i]); + r_.values[2 * i + 1] = + simde_vaddh_f16(simde_float16_from_float32(-simde_float16_to_float32(b_.values[2 * i])), a_.values[2 * i + 1]); + } + #endif + return simde_float16x4_from_private(r_); + #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcadd_rot270_f16 -#define vcadd_rot270_f16(a, b) simde_vcadd_rot270_f16(a, b) + #undef vcadd_rot270_f16 + #define vcadd_rot270_f16(a, b) simde_vcadd_rot270_f16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vcaddq_rot270_f16(simde_float16x8_t a, - simde_float16x8_t b) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcaddq_rot270_f16(a, b); -#else - simde_float16x8_private r_, a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); - -#if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, - 13, 4, 15, 6); - r_.values = b_.values + a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = simde_vaddh_f16(b_.values[2 * i + 1], a_.values[2 * i]); - r_.values[2 * i + 1] = simde_vaddh_f16( - simde_float16_from_float32(-simde_float16_to_float32(b_.values[2 * i])), - a_.values[2 * i + 1]); - } -#endif - return simde_float16x8_from_private(r_); -#endif +simde_float16x8_t simde_vcaddq_rot270_f16(simde_float16x8_t a, simde_float16x8_t b) +{ + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcaddq_rot270_f16(a, b); + #else + simde_float16x8_private r_, a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(b); + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 9, 0, 11, 2, 13, 4, 15, 6); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { + r_.values[2 * i] = simde_vaddh_f16(b_.values[2 * i + 1], a_.values[2 * i]); + r_.values[2 * i + 1] = + simde_vaddh_f16(simde_float16_from_float32(-simde_float16_to_float32(b_.values[2 * i])), a_.values[2 * i + 1]); + } + #endif + return simde_float16x8_from_private(r_); + #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcaddq_rot270_f16 -#define vcaddq_rot270_f16(a, b) simde_vcaddq_rot270_f16(a, b) + #undef vcaddq_rot270_f16 + #define vcaddq_rot270_f16(a, b) simde_vcaddq_rot270_f16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vcadd_rot270_f32(simde_float32x2_t a, - simde_float32x2_t b) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcadd_rot270_f32(a, b); -#else - simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); - r_.values = b_.values + a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; - r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; - } -#endif - - return simde_float32x2_from_private(r_); -#endif +simde_float32x2_t simde_vcadd_rot270_f32(simde_float32x2_t a, simde_float32x2_t b) +{ + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcadd_rot270_f32(a, b); + #else + simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { + r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; + r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; + } + #endif + return simde_float32x2_from_private(r_); + #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcadd_rot270_f32 -#define vcadd_rot270_f32(a, b) simde_vcadd_rot270_f32(a, b) + #undef vcadd_rot270_f32 + #define vcadd_rot270_f32(a, b) simde_vcadd_rot270_f32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vcaddq_rot270_f32(simde_float32x4_t a, - simde_float32x4_t b) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcaddq_rot270_f32(a, b); -#else - simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - -#if defined(SIMDE_SHUFFLE_VECTOR_) - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); - r_.values = b_.values + a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; - r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; - } -#endif - - return simde_float32x4_from_private(r_); -#endif +simde_float32x4_t simde_vcaddq_rot270_f32(simde_float32x4_t a, simde_float32x4_t b) +{ + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcaddq_rot270_f32(a, b); + #else + simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); + #if defined(SIMDE_SHUFFLE_VECTOR_) + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { + r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; + r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; + } + #endif + return simde_float32x4_from_private(r_); + #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcaddq_rot270_f32 -#define vcaddq_rot270_f32(a, b) simde_vcaddq_rot270_f32(a, b) + #undef vcaddq_rot270_f32 + #define vcaddq_rot270_f32(a, b) simde_vcaddq_rot270_f32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t simde_vcaddq_rot270_f64(simde_float64x2_t a, - simde_float64x2_t b) { -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcaddq_rot270_f64(a, b); -#else - simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - -#if defined(SIMDE_SHUFFLE_VECTOR_) - b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, b_.values, 3, 0); - r_.values = b_.values + a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; - r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; - } -#endif - - return simde_float64x2_from_private(r_); -#endif +simde_float64x2_t simde_vcaddq_rot270_f64(simde_float64x2_t a, simde_float64x2_t b) +{ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcaddq_rot270_f64(a, b); + #else + simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); + #if defined(SIMDE_SHUFFLE_VECTOR_) + b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, b_.values, 3, 0); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { + r_.values[2 * i] = b_.values[2 * i + 1] + a_.values[2 * i]; + r_.values[2 * i + 1] = -(b_.values[2 * i]) + a_.values[2 * i + 1]; + } + #endif + return simde_float64x2_from_private(r_); + #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcaddq_rot270_f64 -#define vcaddq_rot270_f64(a, b) simde_vcaddq_rot270_f64(a, b) + #undef vcaddq_rot270_f64 + #define vcaddq_rot270_f64(a, b) simde_vcaddq_rot270_f64(a, b) #endif SIMDE_END_DECLS_ diff --git a/simde/arm/neon/cadd_rot90.h b/simde/arm/neon/cadd_rot90.h index 5d6607e11..b03a04015 100644 --- a/simde/arm/neon/cadd_rot90.h +++ b/simde/arm/neon/cadd_rot90.h @@ -34,34 +34,29 @@ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vcadd_rot90_f16(simde_float16x4_t a, - simde_float16x4_t b) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcadd_rot90_f16(a, b); -#else - simde_float16x4_private r_, a_ = simde_float16x4_to_private(a), - b_ = simde_float16x4_to_private(b); -#if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); - r_.values = b_.values + a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = - simde_vaddh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i + 1])), - a_.values[2 * i]); - r_.values[2 * i + 1] = - simde_vaddh_f16(b_.values[2 * i], a_.values[2 * i + 1]); - } -#endif - return simde_float16x4_from_private(r_); -#endif +simde_float16x4_t simde_vcadd_rot90_f16(simde_float16x4_t a, simde_float16x4_t b) +{ + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcadd_rot90_f16(a, b); + #else + simde_float16x4_private r_, a_ = simde_float16x4_to_private(a), b_ = simde_float16x4_to_private(b); + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { + r_.values[2 * i] = + simde_vaddh_f16(simde_float16_from_float32(-simde_float16_to_float32(b_.values[2 * i + 1])), a_.values[2 * i]); + r_.values[2 * i + 1] = simde_vaddh_f16(b_.values[2 * i], a_.values[2 * i + 1]); + } + #endif + return simde_float16x4_from_private(r_); + #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcadd_rot90_f16 @@ -69,128 +64,113 @@ simde_float16x4_t simde_vcadd_rot90_f16(simde_float16x4_t a, #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vcaddq_rot90_f16(simde_float16x8_t a, - simde_float16x8_t b) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcaddq_rot90_f16(a, b); -#else - simde_float16x8_private r_, a_ = simde_float16x8_to_private(a), - b_ = simde_float16x8_to_private(b); -#if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) - b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, - 5, 12, 7, 14); - r_.values = b_.values + a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = - simde_vaddh_f16(simde_float16_from_float32( - -simde_float16_to_float32(b_.values[2 * i + 1])), - a_.values[2 * i]); - r_.values[2 * i + 1] = - simde_vaddh_f16(b_.values[2 * i], a_.values[2 * i + 1]); - } -#endif - return simde_float16x8_from_private(r_); -#endif +simde_float16x8_t simde_vcaddq_rot90_f16(simde_float16x8_t a, simde_float16x8_t b) +{ + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcaddq_rot90_f16(a, b); + #else + simde_float16x8_private r_, a_ = simde_float16x8_to_private(a), b_ = simde_float16x8_to_private(b); + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + b_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, -b_.values, b_.values, 1, 8, 3, 10, 5, 12, 7, 14); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { + r_.values[2 * i] = + simde_vaddh_f16(simde_float16_from_float32(-simde_float16_to_float32(b_.values[2 * i + 1])), a_.values[2 * i]); + r_.values[2 * i + 1] = simde_vaddh_f16(b_.values[2 * i], a_.values[2 * i + 1]); + } + #endif + return simde_float16x8_from_private(r_); + #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcaddq_rot90_f16 -#define vcaddq_rot90_f16(a, b) simde_vcaddq_rot90_f16(a, b) + #undef vcaddq_rot90_f16 + #define vcaddq_rot90_f16(a, b) simde_vcaddq_rot90_f16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vcadd_rot90_f32(simde_float32x2_t a, - simde_float32x2_t b) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcadd_rot90_f32(a, b); -#else - simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(b); - -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); - r_.values = b_.values + a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; - r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; - } -#endif - - return simde_float32x2_from_private(r_); -#endif +simde_float32x2_t simde_vcadd_rot90_f32(simde_float32x2_t a, simde_float32x2_t b) +{ + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcadd_rot90_f32(a, b); + #else + simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { + r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; + r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; + } + #endif + return simde_float32x2_from_private(r_); + #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcadd_rot90_f32 -#define vcadd_rot90_f32(a, b) simde_vcadd_rot90_f32(a, b) + #undef vcadd_rot90_f32 + #define vcadd_rot90_f32(a, b) simde_vcadd_rot90_f32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vcaddq_rot90_f32(simde_float32x4_t a, - simde_float32x4_t b) { -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcaddq_rot90_f32(a, b); -#else - simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(b); - -#if defined(SIMDE_SHUFFLE_VECTOR_) - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); - r_.values = b_.values + a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; - r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; - } -#endif - - return simde_float32x4_from_private(r_); -#endif +simde_float32x4_t simde_vcaddq_rot90_f32(simde_float32x4_t a, simde_float32x4_t b) +{ + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcaddq_rot90_f32(a, b); + #else + simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); + #if defined(SIMDE_SHUFFLE_VECTOR_) + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { + r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; + r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; + } + #endif + return simde_float32x4_from_private(r_); + #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcaddq_rot90_f32 -#define vcaddq_rot90_f32(a, b) simde_vcaddq_rot90_f32(a, b) + #undef vcaddq_rot90_f32 + #define vcaddq_rot90_f32(a, b) simde_vcaddq_rot90_f32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float64x2_t simde_vcaddq_rot90_f64(simde_float64x2_t a, - simde_float64x2_t b) { -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ - (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - return vcaddq_rot90_f64(a, b); -#else - simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), - b_ = simde_float64x2_to_private(b); - -#if defined(SIMDE_SHUFFLE_VECTOR_) - b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, b_.values, 1, 2); - r_.values = b_.values + a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; - r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; - } -#endif - - return simde_float64x2_from_private(r_); -#endif +simde_float64x2_t simde_vcaddq_rot90_f64(simde_float64x2_t a, simde_float64x2_t b) +{ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) + return vcaddq_rot90_f64(a, b); + #else + simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); + #if defined(SIMDE_SHUFFLE_VECTOR_) + b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, b_.values, 1, 2); + r_.values = b_.values + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { + r_.values[2 * i] = -(b_.values[2 * i + 1]) + a_.values[2 * i]; + r_.values[2 * i + 1] = b_.values[2 * i] + a_.values[2 * i + 1]; + } + #endif + return simde_float64x2_from_private(r_); + #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcaddq_rot90_f64 diff --git a/simde/arm/neon/cmla_lane.h b/simde/arm/neon/cmla_lane.h index 2b20c82e7..4355bf7a5 100644 --- a/simde/arm/neon/cmla_lane.h +++ b/simde/arm/neon/cmla_lane.h @@ -28,37 +28,33 @@ #define SIMDE_ARM_NEON_CMLA_LANE_H #include "add.h" +#include "combine.h" +#include "cvt.h" #include "dup_lane.h" -#include "mul.h" -#include "types.h" #include "get_high.h" #include "get_low.h" -#include "combine.h" -#include "cvt.h" +#include "mul.h" +#include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vcmla_lane_f16(simde_float16x4_t r, simde_float16x4_t a, - simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_private r_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x4_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) +simde_float16x4_t simde_vcmla_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) +{ + simde_float32x4_private r_ = simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; } @@ -66,30 +62,28 @@ simde_float16x4_t simde_vcmla_lane_f16(simde_float16x4_t r, simde_float16x4_t a, return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmla_lane_f16 -#define vcmla_lane_f16(r, a, b, lane) simde_vcmla_lane_f16(r, a, b, lane) + #undef vcmla_lane_f16 + #define vcmla_lane_f16(r, a, b, lane) simde_vcmla_lane_f16(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmla_lane_f16(r, a, b, lane) vcmla_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vcmla_lane_f32(simde_float32x2_t r, simde_float32x2_t a, - simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_float32x2_private r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32( - simde_float32x2_to_private(b).values[lane])); +simde_float32x2_t simde_vcmla_lane_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) +{ + simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x2_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; } @@ -97,36 +91,31 @@ simde_float32x2_t simde_vcmla_lane_f32(simde_float32x2_t r, simde_float32x2_t a, return simde_float32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmla_lane_f32 -#define vcmla_lane_f32(r, a, b, lane) simde_vcmla_lane_f32(r, a, b, lane) + #undef vcmla_lane_f32 + #define vcmla_lane_f32(r, a, b, lane) simde_vcmla_lane_f32(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmla_lane_f32(r, a, b, lane) vcmla_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t simde_vcmla_laneq_f16(simde_float16x4_t r, - simde_float16x4_t a, - simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_private r_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) +simde_float16x4_t simde_vcmla_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) +{ + simde_float32x4_private r_ = simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; } @@ -137,28 +126,25 @@ simde_float16x4_t simde_vcmla_laneq_f16(simde_float16x4_t r, #undef vcmla_laneq_f16 #define vcmla_laneq_f16(r, a, b, lane) simde_vcmla_laneq_f16(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmla_laneq_f16(r, a, b, lane) vcmla_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t simde_vcmla_laneq_f32(simde_float32x2_t r, - simde_float32x2_t a, - simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x2_private r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32( - simde_float32x4_to_private(b).values[lane])); +simde_float32x2_t simde_vcmla_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) +{ + simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x4_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; } @@ -169,39 +155,32 @@ simde_float32x2_t simde_vcmla_laneq_f32(simde_float32x2_t r, #undef vcmla_laneq_f32 #define vcmla_laneq_f32(r, a, b, lane) simde_vcmla_laneq_f32(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmla_laneq_f32(r, a, b, lane) vcmla_laneq_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vcmlaq_lane_f16(simde_float16x8_t r, - simde_float16x8_t a, - simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_private r_low = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), - a_low = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), - r_high = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), - a_high = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x4_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) +simde_float16x8_t simde_vcmlaq_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) +{ + simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), + a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), + r_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), + a_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 0, 0, 2, 2); a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 0, 0, 2, 2); r_low.values += b_.values * a_low.values; r_high.values += b_.values * a_high.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) + { r_low.values[2 * i] += b_.values[lane] * a_low.values[2 * i]; r_low.values[2 * i + 1] += b_.values[lane] * a_low.values[2 * i]; r_high.values[2 * i] += b_.values[lane] * a_high.values[2 * i]; @@ -209,34 +188,31 @@ simde_float16x8_t simde_vcmlaq_lane_f16(simde_float16x8_t r, } #endif return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_lane_f16 #define vcmlaq_lane_f16(r, a, b, lane) simde_vcmlaq_lane_f16(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmlaq_lane_f16(r, a, b, lane) vcmlaq_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vcmlaq_lane_f32(simde_float32x4_t r, - simde_float32x4_t a, - simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_float32x4_private r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32( - simde_float32x2_to_private(b).values[lane])); +simde_float32x4_t simde_vcmlaq_lane_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) +{ + simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x2_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; } @@ -247,39 +223,32 @@ simde_float32x4_t simde_vcmlaq_lane_f32(simde_float32x4_t r, #undef vcmlaq_lane_f32 #define vcmlaq_lane_f32(r, a, b, lane) simde_vcmlaq_lane_f32(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) - #define simde_vcmlaq_lane_f32(r, a, b, lane) vcmlaq_lane_f32(r, a, b, 0); + #define simde_vcmlaq_lane_f32(r, a, b, lane) vcmlaq_lane_f32(r, a, b, 0); #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t simde_vcmlaq_laneq_f16(simde_float16x8_t r, - simde_float16x8_t a, - simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x4_private r_low = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), - a_low = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), - r_high = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), - a_high = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) +simde_float16x8_t simde_vcmlaq_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) +{ + simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), + a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), + r_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), + a_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 0, 0, 2, 2); r_low.values += b_.values * a_low.values; a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 0, 0, 2, 2); r_high.values += b_.values * a_high.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) + { r_low.values[2 * i] += b_.values[lane] * a_low.values[2 * i]; r_low.values[2 * i + 1] += b_.values[lane] * a_low.values[2 * i]; r_high.values[2 * i] += b_.values[lane] * a_high.values[2 * i]; @@ -287,47 +256,44 @@ simde_float16x8_t simde_vcmlaq_laneq_f16(simde_float16x8_t r, } #endif return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_laneq_f16 #define vcmlaq_laneq_f16(r, a, b, lane) simde_vcmlaq_laneq_f16(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmlaq_laneq_f16(r, a, b, lane) vcmlaq_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t simde_vcmlaq_laneq_f32(simde_float32x4_t r, - simde_float32x4_t a, - simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_private r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32( - simde_float32x4_to_private(b).values[lane])); +simde_float32x4_t simde_vcmlaq_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) +{ + simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x4_to_private(b).values[lane])); -#if defined(SIMDE_SHUFFLE_VECTOR_) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; - r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; - } -#endif + #if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { + r_.values[2 * i] += b_.values[lane] * a_.values[2 * i]; + r_.values[2 * i + 1] += b_.values[lane] * a_.values[2 * i]; + } + #endif return simde_float32x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_laneq_f32 #define vcmlaq_laneq_f32(r, a, b, lane) simde_vcmlaq_laneq_f32(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmlaq_laneq_f32(r, a, b, lane) vcmlaq_laneq_f32(r, a, b, lane) #endif diff --git a/simde/arm/neon/cmla_rot180_lane.h b/simde/arm/neon/cmla_rot180_lane.h index 328986f9e..d72225917 100644 --- a/simde/arm/neon/cmla_rot180_lane.h +++ b/simde/arm/neon/cmla_rot180_lane.h @@ -28,41 +28,36 @@ #define SIMDE_ARM_NEON_CMLA_ROT180_LANE_H #include "add.h" -#include "dup_lane.h" -#include "mul.h" -#include "types.h" +#include "combine.h" #include "cvt.h" +#include "dup_lane.h" #include "get_high.h" #include "get_low.h" -#include "combine.h" +#include "mul.h" +#include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcmla_rot180_lane_f16(simde_float16x4_t r, simde_float16x4_t a, - simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_private r_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(a)), +simde_float16x4_t simde_vcmla_rot180_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) +{ + simde_float32x4_private r_ = simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = simde_float32x4_to_private(simde_vcvt_f32_f16(a)), b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x4_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; } #endif return simde_vcvt_f16_f32(simde_float32x4_from_private(r_)); @@ -71,67 +66,54 @@ simde_vcmla_rot180_lane_f16(simde_float16x4_t r, simde_float16x4_t a, #undef vcmla_rot180_lane_f16 #define vcmla_rot180_lane_f16(r, a, b, lane) simde_vcmla_rot180_lane_f16(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmla_rot180_lane_f16(r, a, b, lane) vcmla_rot180_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcmla_rot180_lane_f32(simde_float32x2_t r, simde_float32x2_t a, - simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_float32x2_private r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32( - simde_float32x2_to_private(b).values[lane])); - -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, -b_.values, 0, 1); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; - r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; - } -#endif - +simde_float32x2_t simde_vcmla_rot180_lane_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) +{ + simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x2_to_private(b).values[lane])); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, -b_.values, 0, 1); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } + #endif return simde_float32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmla_rot180_lane_f32 -#define vcmla_rot180_lane_f32(r, a, b, lane) \ - simde_vcmla_rot180_lane_f32(r, a, b, lane) + #undef vcmla_rot180_lane_f32 + #define vcmla_rot180_lane_f32(r, a, b, lane) simde_vcmla_rot180_lane_f32(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmla_rot180_lane_f32(r, a, b, lane) vcmla_rot180_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, simde_float16x8_t a, - simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_private r_low = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), - a_low = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), - r_high = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), - a_high = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x4_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) +simde_float16x8_t simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) +{ + simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), + a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), + r_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), + a_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 0, 0, 2, 2); a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 0, 0, 2, 2); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); @@ -139,8 +121,8 @@ simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, simde_float16x8_t a, r_high.values += b_.values * a_high.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) + { r_low.values[2 * i] += -(b_.values[2 * i]) * a_low.values[2 * i]; r_low.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_low.values[2 * i]; r_high.values[2 * i] += -(b_.values[2 * i]) * a_high.values[2 * i]; @@ -148,75 +130,65 @@ simde_vcmlaq_rot180_lane_f16(simde_float16x8_t r, simde_float16x8_t a, } #endif return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot180_lane_f16 #define vcmlaq_rot180_lane_f16(r, a, b, lane) simde_vcmlaq_rot180_lane_f16(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmlaq_rot180_lane_f16(r, a, b, lane) vcmlaq_rot180_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcmlaq_rot180_lane_f32(simde_float32x4_t r, simde_float32x4_t a, - simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_float32x4_private r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32( - simde_float32x2_to_private(b).values[lane])); - +simde_float32x4_t simde_vcmlaq_rot180_lane_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) +{ + simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x2_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 0, 1, 2, 3); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; } #endif - return simde_float32x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot180_lane_f32 #define vcmlaq_rot180_lane_f32(r, a, b, lane) simde_vcmlaq_rot180_lane_f32(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmlaq_rot180_lane_f32(r, a, b, lane) vcmlaq_rot180_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcmla_rot180_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, - simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_private r_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(a)), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) +simde_float16x4_t simde_vcmla_rot180_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) +{ + simde_float32x4_private r_ = simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = simde_float32x4_to_private(simde_vcvt_f32_f16(a)), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 0, 0, 2, 2); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; } @@ -227,30 +199,26 @@ simde_vcmla_rot180_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, #undef vcmla_rot180_laneq_f16 #define vcmla_rot180_laneq_f16(r, a, b, lane) simde_vcmla_rot180_laneq_f16(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmla_rot180_laneq_f16(r, a, b, lane) vcmla_rot180_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcmla_rot180_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, - simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x2_private r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32( - simde_float32x4_to_private(b).values[lane])); - +simde_float32x2_t simde_vcmla_rot180_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) +{ + simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x4_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, -b_.values, 0, 1); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; } @@ -261,31 +229,24 @@ simde_vcmla_rot180_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, #undef vcmla_rot180_laneq_f32 #define vcmla_rot180_laneq_f32(r, a, b, lane) simde_vcmla_rot180_laneq_f32(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmla_rot180_laneq_f32(r, a, b, lane) vcmla_rot180_laneq_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, - simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x4_private r_low = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), - a_low = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), - r_high = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), - a_high = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) +simde_float16x8_t simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) +{ + simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), + a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), + r_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), + a_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 0, 0, 2, 2); a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 0, 0, 2, 2); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 0, 1, 2, 3); @@ -293,8 +254,8 @@ simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, r_high.values += b_.values * a_high.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) + { r_low.values[2 * i] += -(b_.values[2 * i]) * a_low.values[2 * i]; r_low.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_low.values[2 * i]; r_high.values[2 * i] += -(b_.values[2 * i]) * a_high.values[2 * i]; @@ -302,35 +263,32 @@ simde_vcmlaq_rot180_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, } #endif return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot180_laneq_f16 #define vcmlaq_rot180_laneq_f16(r, a, b, lane) simde_vcmlaq_rot180_laneq_f16(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmlaq_rot180_laneq_f16(r, a, b, lane) vcmlaq_rot180_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcmlaq_rot180_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, - simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_private r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32( - simde_float32x4_to_private(b).values[lane])); +simde_float32x4_t simde_vcmlaq_rot180_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) +{ + simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x4_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 0, 1, 2, 3); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; } @@ -341,8 +299,8 @@ simde_vcmlaq_rot180_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, #undef vcmlaq_rot180_laneq_f32 #define vcmlaq_rot180_laneq_f32(r, a, b, lane) simde_vcmlaq_rot180_laneq_f32(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmlaq_rot180_laneq_f32(r, a, b, lane) vcmlaq_rot180_laneq_f32(r, a, b, lane) #endif diff --git a/simde/arm/neon/cmla_rot270_lane.h b/simde/arm/neon/cmla_rot270_lane.h index e2d690f8e..d8d64dd38 100644 --- a/simde/arm/neon/cmla_rot270_lane.h +++ b/simde/arm/neon/cmla_rot270_lane.h @@ -28,39 +28,34 @@ #define SIMDE_ARM_NEON_CMLA_ROT270_LANE_H #include "add.h" -#include "dup_lane.h" -#include "mul.h" -#include "types.h" +#include "combine.h" #include "cvt.h" +#include "dup_lane.h" #include "get_high.h" #include "get_low.h" -#include "combine.h" +#include "mul.h" +#include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcmla_rot270_lane_f16(simde_float16x4_t r, simde_float16x4_t a, - simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_private r_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(a)), +simde_float16x4_t simde_vcmla_rot270_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) +{ + simde_float32x4_private r_ = simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = simde_float32x4_to_private(simde_vcvt_f32_f16(a)), b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x4_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; } @@ -71,29 +66,26 @@ simde_vcmla_rot270_lane_f16(simde_float16x4_t r, simde_float16x4_t a, #undef vcmla_rot270_lane_f16 #define vcmla_rot270_lane_f16(r, a, b, lane) simde_vcmla_rot270_lane_f16(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmla_rot270_lane_f16(r, a, b, lane) vcmla_rot270_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcmla_rot270_lane_f32(simde_float32x2_t r, simde_float32x2_t a, - simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_float32x2_private r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32( - simde_float32x2_to_private(b).values[lane])); +simde_float32x2_t simde_vcmla_rot270_lane_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) +{ + simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x2_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; } @@ -104,31 +96,24 @@ simde_vcmla_rot270_lane_f32(simde_float32x2_t r, simde_float32x2_t a, #undef vcmla_rot270_lane_f32 #define vcmla_rot270_lane_f32(r, a, b, lane) simde_vcmla_rot270_lane_f32(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmla_rot270_lane_f32(r, a, b, lane) vcmla_rot270_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, simde_float16x8_t a, - simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_private r_low = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), - a_low = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), - r_high = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), - a_high = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x4_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) +simde_float16x8_t simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) +{ + simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), + a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), + r_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), + a_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 1, 1, 3, 3); a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); @@ -136,8 +121,8 @@ simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, simde_float16x8_t a, r_high.values += b_.values * a_high.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) + { r_low.values[2 * i] += b_.values[2 * i + 1] * a_low.values[2 * i + 1]; r_low.values[2 * i + 1] += -(b_.values[2 * i]) * a_low.values[2 * i + 1]; r_high.values[2 * i] += b_.values[2 * i + 1] * a_high.values[2 * i + 1]; @@ -145,35 +130,32 @@ simde_vcmlaq_rot270_lane_f16(simde_float16x8_t r, simde_float16x8_t a, } #endif return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot270_lane_f16 #define vcmlaq_rot270_lane_f16(r, a, b, lane) simde_vcmlaq_rot270_lane_f16(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmlaq_rot270_lane_f16(r, a, b, lane) vcmlaq_rot270_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcmlaq_rot270_lane_f32(simde_float32x4_t r, simde_float32x4_t a, - simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_float32x4_private r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32( - simde_float32x2_to_private(b).values[lane])); +simde_float32x4_t simde_vcmlaq_rot270_lane_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) +{ + simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x2_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; } @@ -184,34 +166,29 @@ simde_vcmlaq_rot270_lane_f32(simde_float32x4_t r, simde_float32x4_t a, #undef vcmlaq_rot270_lane_f32 #define vcmlaq_rot270_lane_f32(r, a, b, lane) simde_vcmlaq_rot270_lane_f32(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmlaq_rot270_lane_f32(r, a, b, lane) vcmlaq_rot270_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcmla_rot270_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, - simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_private r_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(a)), +simde_float16x4_t simde_vcmla_rot270_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) +{ + simde_float32x4_private r_ = simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = simde_float32x4_to_private(simde_vcvt_f32_f16(a)), b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; } @@ -222,65 +199,54 @@ simde_vcmla_rot270_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, #undef vcmla_rot270_laneq_f16 #define vcmla_rot270_laneq_f16(r, a, b, lane) simde_vcmla_rot270_laneq_f16(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmla_rot270_laneq_f16(r, a, b, lane) vcmla_rot270_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcmla_rot270_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, - simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x2_private r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32( - simde_float32x4_to_private(b).values[lane])); - -#if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) - a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); - b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); - r_.values += b_.values * a_.values; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { - r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; - r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; - } -#endif +simde_float32x2_t simde_vcmla_rot270_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) +{ + simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x4_to_private(b).values[lane])); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } + #endif return simde_float32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) -#undef vcmla_rot270_laneq_f32 -#define vcmla_rot270_laneq_f32(r, a, b, lane) simde_vcmla_rot270_laneq_f32(r, a, b, lane) + #undef vcmla_rot270_laneq_f32 + #define vcmla_rot270_laneq_f32(r, a, b, lane) simde_vcmla_rot270_laneq_f32(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmla_rot270_laneq_f32(r, a, b, lane) vcmla_rot270_laneq_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, - simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x4_private r_low = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), - a_low = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), - r_high = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), - a_high = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) +simde_float16x8_t simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) +{ + simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), + a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), + r_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), + a_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 1, 1, 3, 3); a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 5, 0, 7, 2); @@ -288,8 +254,8 @@ simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, r_low.values += b_.values * a_low.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) + { r_low.values[2 * i] += b_.values[2 * i + 1] * a_low.values[2 * i + 1]; r_low.values[2 * i + 1] += -(b_.values[2 * i]) * a_low.values[2 * i + 1]; r_high.values[2 * i] += b_.values[2 * i + 1] * a_high.values[2 * i + 1]; @@ -297,35 +263,32 @@ simde_vcmlaq_rot270_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, } #endif return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot270_laneq_f16 #define vcmlaq_rot270_laneq_f16(r, a, b, lane) simde_vcmlaq_rot270_laneq_f16(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmlaq_rot270_laneq_f16(r, a, b, lane) vcmlaq_rot270_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcmlaq_rot270_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, - simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_private r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32( - simde_float32x4_to_private(b).values[lane])); +simde_float32x4_t simde_vcmlaq_rot270_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b, + const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) +{ + simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x4_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; } @@ -336,8 +299,8 @@ simde_vcmlaq_rot270_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, #undef vcmlaq_rot270_laneq_f32 #define vcmlaq_rot270_laneq_f32(r, a, b, lane) simde_vcmlaq_rot270_laneq_f32(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmlaq_rot270_laneq_f32(r, a, b, lane) vcmlaq_rot270_laneq_f32(r, a, b, lane) #endif diff --git a/simde/arm/neon/cmla_rot90_lane.h b/simde/arm/neon/cmla_rot90_lane.h index af9f044ef..45df8c0ed 100644 --- a/simde/arm/neon/cmla_rot90_lane.h +++ b/simde/arm/neon/cmla_rot90_lane.h @@ -28,11 +28,11 @@ #define SIMDE_ARM_NEON_CMLA_ROT90_LANE_H #include "add.h" +#include "combine.h" #include "cvt.h" +#include "dup_lane.h" #include "get_high.h" #include "get_low.h" -#include "combine.h" -#include "dup_lane.h" #include "mul.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH @@ -40,27 +40,22 @@ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcmla_rot90_lane_f16(simde_float16x4_t r, simde_float16x4_t a, - simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_private r_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(a)), +simde_float16x4_t simde_vcmla_rot90_lane_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) +{ + simde_float32x4_private r_ = simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = simde_float32x4_to_private(simde_vcvt_f32_f16(a)), b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x4_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; } @@ -71,69 +66,59 @@ simde_vcmla_rot90_lane_f16(simde_float16x4_t r, simde_float16x4_t a, #undef vcmla_rot90_lane_f16 #define vcmla_rot90_lane_f16(r, a, b, lane) simde_vcmla_rot90_lane_f16(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmla_rot90_lane_f16(r, a, b, lane) vcmla_rot90_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcmla_rot90_lane_f32(simde_float32x2_t r, simde_float32x2_t a, - simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_float32x2_private r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32( - simde_float32x2_to_private(b).values[lane])); - +simde_float32x2_t simde_vcmla_rot90_lane_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) +{ + simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x2_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; } #endif - return simde_float32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmla_rot90_lane_f32 #define vcmla_rot90_lane_f32(r, a, b, lane) simde_vcmla_rot90_lane_f32(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmla_rot90_lane_f32(r, a, b, lane) vcmla_rot90_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x4_t -simde_vcmla_rot90_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, - simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_private r_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(r)), - a_ = - simde_float32x4_to_private(simde_vcvt_f32_f16(a)), +simde_float16x4_t simde_vcmla_rot90_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) +{ + simde_float32x4_private r_ = simde_float32x4_to_private(simde_vcvt_f32_f16(r)), + a_ = simde_float32x4_to_private(simde_vcvt_f32_f16(a)), b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) + simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_.values, a_.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; } @@ -144,21 +129,18 @@ simde_vcmla_rot90_laneq_f16(simde_float16x4_t r, simde_float16x4_t a, #undef vcmla_rot90_laneq_f16 #define vcmla_rot90_laneq_f16(r, a, b, lane) simde_vcmla_rot90_laneq_f16(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmla_rot90_laneq_f16(r, a, b, lane) vcmla_rot90_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x2_t -simde_vcmla_rot90_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, - simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x2_private r_ = simde_float32x2_to_private(r), - a_ = simde_float32x2_to_private(a), - b_ = simde_float32x2_to_private(simde_vdup_n_f32( - simde_float32x4_to_private(b).values[lane])); +simde_float32x2_t simde_vcmla_rot90_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) +{ + simde_float32x2_private r_ = simde_float32x2_to_private(r), a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(simde_vdup_n_f32(simde_float32x4_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); @@ -166,8 +148,8 @@ simde_vcmla_rot90_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; } @@ -178,31 +160,24 @@ simde_vcmla_rot90_laneq_f32(simde_float32x2_t r, simde_float32x2_t a, #undef vcmla_rot90_laneq_f32 #define vcmla_rot90_laneq_f32(r, a, b, lane) simde_vcmla_rot90_laneq_f32(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmla_rot90_laneq_f32(r, a, b, lane) vcmla_rot90_laneq_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, simde_float16x8_t a, - simde_float16x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_private r_low = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), - a_low = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), - r_high = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), - a_high = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x4_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) +simde_float16x8_t simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) +{ + simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), + a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), + r_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), + a_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x4_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 1, 1, 3, 3); a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); @@ -210,8 +185,8 @@ simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, simde_float16x8_t a, r_high.values += b_.values * a_high.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) + { r_low.values[2 * i] += -(b_.values[2 * i + 1]) * a_low.values[2 * i + 1]; r_low.values[2 * i + 1] += b_.values[2 * i] * a_low.values[2 * i + 1]; r_high.values[2 * i] += -(b_.values[2 * i + 1]) * a_high.values[2 * i + 1]; @@ -219,36 +194,32 @@ simde_vcmlaq_rot90_lane_f16(simde_float16x8_t r, simde_float16x8_t a, } #endif return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot90_lane_f16 #define vcmlaq_rot90_lane_f16(r, a, b, lane) simde_vcmlaq_rot90_lane_f16(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmlaq_rot90_lane_f16(r, a, b, lane) vcmlaq_rot90_lane_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcmlaq_rot90_lane_f32(simde_float32x4_t r, simde_float32x4_t a, - simde_float32x2_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { - simde_float32x4_private r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32( - simde_float32x2_to_private(b).values[lane])); - +simde_float32x4_t simde_vcmlaq_rot90_lane_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) +{ + simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x2_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; } @@ -259,31 +230,24 @@ simde_vcmlaq_rot90_lane_f32(simde_float32x4_t r, simde_float32x4_t a, #undef vcmlaq_rot90_lane_f32 #define vcmlaq_rot90_lane_f32(r, a, b, lane) simde_vcmlaq_rot90_lane_f32(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmlaq_rot90_lane_f32(r, a, b, lane) vcmlaq_rot90_lane_f32(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float16x8_t -simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, - simde_float16x8_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_float32x4_private r_low = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), - a_low = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), - r_high = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), - a_high = - simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), - b_ = simde_float32x4_to_private( - simde_vcvt_f32_f16(simde_vdup_n_f16( - simde_float16x8_to_private(b).values[lane]))); - #if defined(SIMDE_SHUFFLE_VECTOR_) && \ - ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || \ - (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) +simde_float16x8_t simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) +{ + simde_float32x4_private r_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(r))), + a_low = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_low_f16(a))), + r_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(r))), + a_high = simde_float32x4_to_private(simde_vcvt_f32_f16(simde_vget_high_f16(a))), + b_ = simde_float32x4_to_private( + simde_vcvt_f32_f16(simde_vdup_n_f16(simde_float16x8_to_private(b).values[lane]))); + #if defined(SIMDE_SHUFFLE_VECTOR_) && \ + ((SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) || (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16)) a_low.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_low.values, a_low.values, 1, 1, 3, 3); a_high.values = SIMDE_SHUFFLE_VECTOR_(16, 4, a_high.values, a_high.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(16, 4, -b_.values, b_.values, 1, 4, 3, 6); @@ -291,8 +255,8 @@ simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, r_high.values += b_.values * a_high.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_low.values) / (2 * sizeof(r_low.values[0]))); i++) + { r_low.values[2 * i] += -(b_.values[2 * i + 1]) * a_low.values[2 * i + 1]; r_low.values[2 * i + 1] += b_.values[2 * i] * a_low.values[2 * i + 1]; r_high.values[2 * i] += -(b_.values[2 * i + 1]) * a_high.values[2 * i + 1]; @@ -300,49 +264,44 @@ simde_vcmlaq_rot90_laneq_f16(simde_float16x8_t r, simde_float16x8_t a, } #endif return simde_vcombine_f16(simde_vcvt_f16_f32(simde_float32x4_from_private(r_low)), - simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); + simde_vcvt_f16_f32(simde_float32x4_from_private(r_high))); } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot90_laneq_f16 #define vcmlaq_rot90_laneq_f16(r, a, b, lane) simde_vcmlaq_rot90_laneq_f16(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmlaq_rot90_laneq_f16(r, a, b, lane) vcmlaq_rot90_laneq_f16(r, a, b, lane) #endif SIMDE_FUNCTION_ATTRIBUTES -simde_float32x4_t -simde_vcmlaq_rot90_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, - simde_float32x4_t b, const int lane) - SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_float32x4_private r_ = simde_float32x4_to_private(r), - a_ = simde_float32x4_to_private(a), - b_ = simde_float32x4_to_private(simde_vdupq_n_f32( - simde_float32x4_to_private(b).values[lane])); - +simde_float32x4_t simde_vcmlaq_rot90_laneq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) +{ + simde_float32x4_private r_ = simde_float32x4_to_private(r), a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(simde_vdupq_n_f32(simde_float32x4_to_private(b).values[lane])); #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); r_.values += b_.values * a_.values; #else SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); - i++) { + for (size_t i = 0; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))); i++) + { r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; } #endif return simde_float32x4_from_private(r_); - } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcmlaq_rot90_laneq_f32 #define vcmlaq_rot90_laneq_f32(r, a, b, lane) simde_vcmlaq_rot90_laneq_f32(r, a, b, lane) #endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8, 3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9, 0, 0)) && \ (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0)) #define simde_vcmlaq_rot90_laneq_f32(r, a, b, lane) vcmlaq_rot90_laneq_f32(r, a, b, lane) #endif diff --git a/test/arm/neon/cadd_rot270.c b/test/arm/neon/cadd_rot270.c index b3262e138..06a7f9234 100644 --- a/test/arm/neon/cadd_rot270.c +++ b/test/arm/neon/cadd_rot270.c @@ -44,8 +44,6 @@ test_simde_vcadd_rot270_f16 (SIMDE_MUNIT_TEST_ARGS) { simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); } - - return 0; #else fputc('\n', stdout); @@ -126,8 +124,6 @@ test_simde_vcaddq_rot270_f16 (SIMDE_MUNIT_TEST_ARGS) { simde_float16x8_t r = simde_vcaddq_rot270_f16(a, b); simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); } - - return 0; #else fputc('\n', stdout); @@ -185,8 +181,6 @@ test_simde_vcadd_rot270_f32 (SIMDE_MUNIT_TEST_ARGS) { simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } - - return 0; #else fputc('\n', stdout); @@ -203,7 +197,7 @@ test_simde_vcadd_rot270_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } -static int +static int test_simde_vcaddq_rot270_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { @@ -243,7 +237,6 @@ test_simde_vcaddq_rot270_f32 (SIMDE_MUNIT_TEST_ARGS) { simde_float32x4_t r = simde_vcaddq_rot270_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); @@ -301,7 +294,6 @@ test_simde_vcaddq_rot270_f64 (SIMDE_MUNIT_TEST_ARGS) { simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); diff --git a/test/arm/neon/cadd_rot90.c b/test/arm/neon/cadd_rot90.c index 31890d427..04deaedf8 100644 --- a/test/arm/neon/cadd_rot90.c +++ b/test/arm/neon/cadd_rot90.c @@ -1,77 +1,55 @@ #define SIMDE_TEST_ARM_NEON_INSN cadd_rot90 -#include "../../../simde/arm/neon/cadd_rot90.h" - #include "test-neon.h" +#include "../../../simde/arm/neon/cadd_rot90.h" -static int test_simde_vcadd_rot90_f16(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcadd_rot90_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float16_t a[4]; simde_float16_t b[4]; simde_float16_t r[4]; } test_vec[] = { - {{SIMDE_FLOAT16_VALUE(816.50), SIMDE_FLOAT16_VALUE(773.50), - SIMDE_FLOAT16_VALUE(-377.75), SIMDE_FLOAT16_VALUE(544.50)}, - {SIMDE_FLOAT16_VALUE(-188.38), SIMDE_FLOAT16_VALUE(269.75), - SIMDE_FLOAT16_VALUE(115.62), SIMDE_FLOAT16_VALUE(-544.00)}, - {SIMDE_FLOAT16_VALUE(547.00), SIMDE_FLOAT16_VALUE(585.00), - SIMDE_FLOAT16_VALUE(166.25), SIMDE_FLOAT16_VALUE(660.00)}}, - {{SIMDE_FLOAT16_VALUE(-659.50), SIMDE_FLOAT16_VALUE(924.50), - SIMDE_FLOAT16_VALUE(484.00), SIMDE_FLOAT16_VALUE(-757.00)}, - {SIMDE_FLOAT16_VALUE(730.00), SIMDE_FLOAT16_VALUE(103.12), - SIMDE_FLOAT16_VALUE(-381.25), SIMDE_FLOAT16_VALUE(898.50)}, - {SIMDE_FLOAT16_VALUE(-762.50), SIMDE_FLOAT16_VALUE(1654.00), - SIMDE_FLOAT16_VALUE(-414.50), SIMDE_FLOAT16_VALUE(-1138.00)}}, - {{SIMDE_FLOAT16_VALUE(293.00), SIMDE_FLOAT16_VALUE(-114.06), - SIMDE_FLOAT16_VALUE(-137.62), SIMDE_FLOAT16_VALUE(722.50)}, - {SIMDE_FLOAT16_VALUE(-6.33), SIMDE_FLOAT16_VALUE(161.38), - SIMDE_FLOAT16_VALUE(115.50), SIMDE_FLOAT16_VALUE(70.38)}, - {SIMDE_FLOAT16_VALUE(131.62), SIMDE_FLOAT16_VALUE(-120.38), - SIMDE_FLOAT16_VALUE(-208.00), SIMDE_FLOAT16_VALUE(838.00)}}, - {{SIMDE_FLOAT16_VALUE(-899.00), SIMDE_FLOAT16_VALUE(-582.50), - SIMDE_FLOAT16_VALUE(359.00), SIMDE_FLOAT16_VALUE(342.25)}, - {SIMDE_FLOAT16_VALUE(913.00), SIMDE_FLOAT16_VALUE(740.50), - SIMDE_FLOAT16_VALUE(-436.25), SIMDE_FLOAT16_VALUE(-904.00)}, - {SIMDE_FLOAT16_VALUE(-1640.00), SIMDE_FLOAT16_VALUE(330.50), - SIMDE_FLOAT16_VALUE(1263.00), SIMDE_FLOAT16_VALUE(-94.00)}}, - {{SIMDE_FLOAT16_VALUE(-335.25), SIMDE_FLOAT16_VALUE(-252.38), - SIMDE_FLOAT16_VALUE(227.25), SIMDE_FLOAT16_VALUE(-8.67)}, - {SIMDE_FLOAT16_VALUE(490.50), SIMDE_FLOAT16_VALUE(507.50), - SIMDE_FLOAT16_VALUE(-979.50), SIMDE_FLOAT16_VALUE(-718.00)}, - {SIMDE_FLOAT16_VALUE(-843.00), SIMDE_FLOAT16_VALUE(238.12), - SIMDE_FLOAT16_VALUE(945.00), SIMDE_FLOAT16_VALUE(-988.00)}}, - {{SIMDE_FLOAT16_VALUE(498.50), SIMDE_FLOAT16_VALUE(205.75), - SIMDE_FLOAT16_VALUE(-797.50), SIMDE_FLOAT16_VALUE(741.50)}, - {SIMDE_FLOAT16_VALUE(-835.00), SIMDE_FLOAT16_VALUE(-308.00), - SIMDE_FLOAT16_VALUE(-674.50), SIMDE_FLOAT16_VALUE(-247.00)}, - {SIMDE_FLOAT16_VALUE(806.50), SIMDE_FLOAT16_VALUE(-629.00), - SIMDE_FLOAT16_VALUE(-550.50), SIMDE_FLOAT16_VALUE(67.00)}}, - {{SIMDE_FLOAT16_VALUE(-210.88), SIMDE_FLOAT16_VALUE(-430.75), - SIMDE_FLOAT16_VALUE(725.50), SIMDE_FLOAT16_VALUE(-944.50)}, - {SIMDE_FLOAT16_VALUE(142.00), SIMDE_FLOAT16_VALUE(-966.00), - SIMDE_FLOAT16_VALUE(-131.50), SIMDE_FLOAT16_VALUE(462.50)}, - {SIMDE_FLOAT16_VALUE(755.00), SIMDE_FLOAT16_VALUE(-288.75), - SIMDE_FLOAT16_VALUE(263.00), SIMDE_FLOAT16_VALUE(-1076.00)}}, - {{SIMDE_FLOAT16_VALUE(-361.00), SIMDE_FLOAT16_VALUE(185.62), - SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(749.50)}, - {SIMDE_FLOAT16_VALUE(-975.50), SIMDE_FLOAT16_VALUE(383.50), - SIMDE_FLOAT16_VALUE(500.25), SIMDE_FLOAT16_VALUE(517.00)}, - {SIMDE_FLOAT16_VALUE(-744.50), SIMDE_FLOAT16_VALUE(-790.00), - SIMDE_FLOAT16_VALUE(-456.50), SIMDE_FLOAT16_VALUE(1250.00)}}}; + { { SIMDE_FLOAT16_VALUE( 816.50), SIMDE_FLOAT16_VALUE( 773.50), SIMDE_FLOAT16_VALUE( -377.75), SIMDE_FLOAT16_VALUE( 544.50) }, + { SIMDE_FLOAT16_VALUE( -188.38), SIMDE_FLOAT16_VALUE( 269.75), SIMDE_FLOAT16_VALUE( 115.62), SIMDE_FLOAT16_VALUE( -544.00) }, + { SIMDE_FLOAT16_VALUE( 547.00), SIMDE_FLOAT16_VALUE( 585.00), SIMDE_FLOAT16_VALUE( 166.25), SIMDE_FLOAT16_VALUE( 660.00) } }, + { { SIMDE_FLOAT16_VALUE( -659.50), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( 484.00), SIMDE_FLOAT16_VALUE( -757.00) }, + { SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, + { SIMDE_FLOAT16_VALUE( -762.50), SIMDE_FLOAT16_VALUE( 1654.00), SIMDE_FLOAT16_VALUE( -414.50), SIMDE_FLOAT16_VALUE( -1138.00) } }, + { { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, + { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, + { SIMDE_FLOAT16_VALUE( 131.62), SIMDE_FLOAT16_VALUE( -120.38), SIMDE_FLOAT16_VALUE( -208.00), SIMDE_FLOAT16_VALUE( 838.00) } }, + { { SIMDE_FLOAT16_VALUE( -899.00), SIMDE_FLOAT16_VALUE( -582.50), SIMDE_FLOAT16_VALUE( 359.00), SIMDE_FLOAT16_VALUE( 342.25) }, + { SIMDE_FLOAT16_VALUE( 913.00), SIMDE_FLOAT16_VALUE( 740.50), SIMDE_FLOAT16_VALUE( -436.25), SIMDE_FLOAT16_VALUE( -904.00) }, + { SIMDE_FLOAT16_VALUE( -1640.00), SIMDE_FLOAT16_VALUE( 330.50), SIMDE_FLOAT16_VALUE( 1263.00), SIMDE_FLOAT16_VALUE( -94.00) } }, + { { SIMDE_FLOAT16_VALUE( -335.25), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( 227.25), SIMDE_FLOAT16_VALUE( -8.67) }, + { SIMDE_FLOAT16_VALUE( 490.50), SIMDE_FLOAT16_VALUE( 507.50), SIMDE_FLOAT16_VALUE( -979.50), SIMDE_FLOAT16_VALUE( -718.00) }, + { SIMDE_FLOAT16_VALUE( -843.00), SIMDE_FLOAT16_VALUE( 238.12), SIMDE_FLOAT16_VALUE( 945.00), SIMDE_FLOAT16_VALUE( -988.00) } }, + { { SIMDE_FLOAT16_VALUE( 498.50), SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -797.50), SIMDE_FLOAT16_VALUE( 741.50) }, + { SIMDE_FLOAT16_VALUE( -835.00), SIMDE_FLOAT16_VALUE( -308.00), SIMDE_FLOAT16_VALUE( -674.50), SIMDE_FLOAT16_VALUE( -247.00) }, + { SIMDE_FLOAT16_VALUE( 806.50), SIMDE_FLOAT16_VALUE( -629.00), SIMDE_FLOAT16_VALUE( -550.50), SIMDE_FLOAT16_VALUE( 67.00) } }, + { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, + { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, + { SIMDE_FLOAT16_VALUE( 755.00), SIMDE_FLOAT16_VALUE( -288.75), SIMDE_FLOAT16_VALUE( 263.00), SIMDE_FLOAT16_VALUE( -1076.00) } }, + { { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50) }, + { SIMDE_FLOAT16_VALUE( -975.50), SIMDE_FLOAT16_VALUE( 383.50), SIMDE_FLOAT16_VALUE( 500.25), SIMDE_FLOAT16_VALUE( 517.00) }, + { SIMDE_FLOAT16_VALUE( -744.50), SIMDE_FLOAT16_VALUE( -790.00), SIMDE_FLOAT16_VALUE( -456.50), SIMDE_FLOAT16_VALUE( 1250.00) } } + }; - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x4_t r = simde_vcadd_rot90_f16(a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); } + return 0; #else fputc('\n', stdout); - for (int i = 0; i < 8; i++) { + for (int i = 0 ; i < 8 ; i++) { simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t r = simde_vcadd_rot90_f16(a, b); @@ -84,123 +62,75 @@ static int test_simde_vcadd_rot90_f16(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcaddq_rot90_f16(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcaddq_rot90_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float16_t a[8]; simde_float16_t b[8]; simde_float16_t r[8]; } test_vec[] = { - {{SIMDE_FLOAT16_VALUE(540.00), SIMDE_FLOAT16_VALUE(-927.50), - SIMDE_FLOAT16_VALUE(17.94), SIMDE_FLOAT16_VALUE(340.75), - SIMDE_FLOAT16_VALUE(-397.75), SIMDE_FLOAT16_VALUE(166.12), - SIMDE_FLOAT16_VALUE(854.00), SIMDE_FLOAT16_VALUE(-110.75)}, - {SIMDE_FLOAT16_VALUE(-936.50), SIMDE_FLOAT16_VALUE(-465.00), - SIMDE_FLOAT16_VALUE(-104.00), SIMDE_FLOAT16_VALUE(-348.00), - SIMDE_FLOAT16_VALUE(-370.75), SIMDE_FLOAT16_VALUE(18.20), - SIMDE_FLOAT16_VALUE(-209.25), SIMDE_FLOAT16_VALUE(-856.00)}, - {SIMDE_FLOAT16_VALUE(1005.000000), SIMDE_FLOAT16_VALUE(-1864.000000), - SIMDE_FLOAT16_VALUE(366.000000), SIMDE_FLOAT16_VALUE(236.750000), - SIMDE_FLOAT16_VALUE(-416.000000), SIMDE_FLOAT16_VALUE(-204.625000), - SIMDE_FLOAT16_VALUE(1710.000000), SIMDE_FLOAT16_VALUE(-320.000000)}}, - {{SIMDE_FLOAT16_VALUE(251.25), SIMDE_FLOAT16_VALUE(523.50), - SIMDE_FLOAT16_VALUE(-62.34), SIMDE_FLOAT16_VALUE(-412.00), - SIMDE_FLOAT16_VALUE(818.50), SIMDE_FLOAT16_VALUE(-266.25), - SIMDE_FLOAT16_VALUE(558.50), SIMDE_FLOAT16_VALUE(-474.75)}, - {SIMDE_FLOAT16_VALUE(583.50), SIMDE_FLOAT16_VALUE(-666.00), - SIMDE_FLOAT16_VALUE(875.50), SIMDE_FLOAT16_VALUE(382.50), - SIMDE_FLOAT16_VALUE(-535.00), SIMDE_FLOAT16_VALUE(374.50), - SIMDE_FLOAT16_VALUE(89.81), SIMDE_FLOAT16_VALUE(432.50)}, - {SIMDE_FLOAT16_VALUE(917.000000), SIMDE_FLOAT16_VALUE(1107.000000), - SIMDE_FLOAT16_VALUE(-444.750000), SIMDE_FLOAT16_VALUE(463.500000), - SIMDE_FLOAT16_VALUE(444.000000), SIMDE_FLOAT16_VALUE(-801.000000), - SIMDE_FLOAT16_VALUE(126.000000), SIMDE_FLOAT16_VALUE(-385.000000)}}, - {{SIMDE_FLOAT16_VALUE(83.19), SIMDE_FLOAT16_VALUE(418.00), - SIMDE_FLOAT16_VALUE(-875.50), SIMDE_FLOAT16_VALUE(830.50), - SIMDE_FLOAT16_VALUE(-633.50), SIMDE_FLOAT16_VALUE(832.00), - SIMDE_FLOAT16_VALUE(-890.00), SIMDE_FLOAT16_VALUE(650.00)}, - {SIMDE_FLOAT16_VALUE(-111.25), SIMDE_FLOAT16_VALUE(-830.50), - SIMDE_FLOAT16_VALUE(59.75), SIMDE_FLOAT16_VALUE(970.50), - SIMDE_FLOAT16_VALUE(522.50), SIMDE_FLOAT16_VALUE(-822.50), - SIMDE_FLOAT16_VALUE(411.25), SIMDE_FLOAT16_VALUE(-692.50)}, - {SIMDE_FLOAT16_VALUE(913.500000), SIMDE_FLOAT16_VALUE(306.750000), - SIMDE_FLOAT16_VALUE(-1846.000000), SIMDE_FLOAT16_VALUE(890.000000), - SIMDE_FLOAT16_VALUE(189.000000), SIMDE_FLOAT16_VALUE(1354.000000), - SIMDE_FLOAT16_VALUE(-197.500000), SIMDE_FLOAT16_VALUE(1061.000000)}}, - {{SIMDE_FLOAT16_VALUE(648.50), SIMDE_FLOAT16_VALUE(385.25), - SIMDE_FLOAT16_VALUE(479.25), SIMDE_FLOAT16_VALUE(-793.50), - SIMDE_FLOAT16_VALUE(-740.50), SIMDE_FLOAT16_VALUE(245.00), - SIMDE_FLOAT16_VALUE(229.25), SIMDE_FLOAT16_VALUE(-113.25)}, - {SIMDE_FLOAT16_VALUE(331.50), SIMDE_FLOAT16_VALUE(-677.50), - SIMDE_FLOAT16_VALUE(97.31), SIMDE_FLOAT16_VALUE(-52.09), - SIMDE_FLOAT16_VALUE(239.12), SIMDE_FLOAT16_VALUE(469.75), - SIMDE_FLOAT16_VALUE(-543.50), SIMDE_FLOAT16_VALUE(827.00)}, - {SIMDE_FLOAT16_VALUE(1326.000000), SIMDE_FLOAT16_VALUE(717.000000), - SIMDE_FLOAT16_VALUE(531.500000), SIMDE_FLOAT16_VALUE(-696.000000), - SIMDE_FLOAT16_VALUE(-1210.000000), SIMDE_FLOAT16_VALUE(484.000000), - SIMDE_FLOAT16_VALUE(-598.000000), SIMDE_FLOAT16_VALUE(-657.000000)}}, - {{SIMDE_FLOAT16_VALUE(226.38), SIMDE_FLOAT16_VALUE(178.88), - SIMDE_FLOAT16_VALUE(181.75), SIMDE_FLOAT16_VALUE(420.50), - SIMDE_FLOAT16_VALUE(-699.00), SIMDE_FLOAT16_VALUE(-151.12), - SIMDE_FLOAT16_VALUE(-388.25), SIMDE_FLOAT16_VALUE(350.75)}, - {SIMDE_FLOAT16_VALUE(890.50), SIMDE_FLOAT16_VALUE(-665.00), - SIMDE_FLOAT16_VALUE(-618.00), SIMDE_FLOAT16_VALUE(190.88), - SIMDE_FLOAT16_VALUE(218.12), SIMDE_FLOAT16_VALUE(-329.00), - SIMDE_FLOAT16_VALUE(-549.50), SIMDE_FLOAT16_VALUE(-460.00)}, - {SIMDE_FLOAT16_VALUE(891.500000), SIMDE_FLOAT16_VALUE(1069.000000), - SIMDE_FLOAT16_VALUE(-9.125000), SIMDE_FLOAT16_VALUE(-197.500000), - SIMDE_FLOAT16_VALUE(-370.000000), SIMDE_FLOAT16_VALUE(67.000000), - SIMDE_FLOAT16_VALUE(71.750000), SIMDE_FLOAT16_VALUE(-198.750000)}}, - {{SIMDE_FLOAT16_VALUE(-378.00), SIMDE_FLOAT16_VALUE(-695.50), - SIMDE_FLOAT16_VALUE(121.50), SIMDE_FLOAT16_VALUE(-78.44), - SIMDE_FLOAT16_VALUE(-869.50), SIMDE_FLOAT16_VALUE(-710.00), - SIMDE_FLOAT16_VALUE(457.25), SIMDE_FLOAT16_VALUE(579.00)}, - {SIMDE_FLOAT16_VALUE(-786.50), SIMDE_FLOAT16_VALUE(-974.00), - SIMDE_FLOAT16_VALUE(-570.50), SIMDE_FLOAT16_VALUE(369.25), - SIMDE_FLOAT16_VALUE(48.06), SIMDE_FLOAT16_VALUE(-640.00), - SIMDE_FLOAT16_VALUE(-552.00), SIMDE_FLOAT16_VALUE(75.88)}, - {SIMDE_FLOAT16_VALUE(596.000000), SIMDE_FLOAT16_VALUE(-1482.000000), - SIMDE_FLOAT16_VALUE(-247.750000), SIMDE_FLOAT16_VALUE(-649.000000), - SIMDE_FLOAT16_VALUE(-229.500000), SIMDE_FLOAT16_VALUE(-662.000000), - SIMDE_FLOAT16_VALUE(381.500000), SIMDE_FLOAT16_VALUE(27.000000)}}, - {{SIMDE_FLOAT16_VALUE(352.25), SIMDE_FLOAT16_VALUE(324.25), - SIMDE_FLOAT16_VALUE(485.50), SIMDE_FLOAT16_VALUE(943.50), - SIMDE_FLOAT16_VALUE(-976.00), SIMDE_FLOAT16_VALUE(395.50), - SIMDE_FLOAT16_VALUE(-100.62), SIMDE_FLOAT16_VALUE(-463.75)}, - {SIMDE_FLOAT16_VALUE(-973.00), SIMDE_FLOAT16_VALUE(-159.62), - SIMDE_FLOAT16_VALUE(-377.00), SIMDE_FLOAT16_VALUE(-853.00), - SIMDE_FLOAT16_VALUE(831.00), SIMDE_FLOAT16_VALUE(661.00), - SIMDE_FLOAT16_VALUE(-717.00), SIMDE_FLOAT16_VALUE(891.50)}, - {SIMDE_FLOAT16_VALUE(512.000000), SIMDE_FLOAT16_VALUE(-649.000000), - SIMDE_FLOAT16_VALUE(1338.000000), SIMDE_FLOAT16_VALUE(566.500000), - SIMDE_FLOAT16_VALUE(-1637.000000), SIMDE_FLOAT16_VALUE(1226.000000), - SIMDE_FLOAT16_VALUE(-992.000000), SIMDE_FLOAT16_VALUE(-1181.000000)}}, - {{SIMDE_FLOAT16_VALUE(535.50), SIMDE_FLOAT16_VALUE(621.50), - SIMDE_FLOAT16_VALUE(578.50), SIMDE_FLOAT16_VALUE(430.50), - SIMDE_FLOAT16_VALUE(-574.50), SIMDE_FLOAT16_VALUE(-319.75), - SIMDE_FLOAT16_VALUE(-735.00), SIMDE_FLOAT16_VALUE(248.50)}, - {SIMDE_FLOAT16_VALUE(274.50), SIMDE_FLOAT16_VALUE(192.38), - SIMDE_FLOAT16_VALUE(679.00), SIMDE_FLOAT16_VALUE(502.25), - SIMDE_FLOAT16_VALUE(-618.50), SIMDE_FLOAT16_VALUE(-70.81), - SIMDE_FLOAT16_VALUE(-596.50), SIMDE_FLOAT16_VALUE(810.50)}, - {SIMDE_FLOAT16_VALUE(343.000000), SIMDE_FLOAT16_VALUE(896.000000), - SIMDE_FLOAT16_VALUE(76.250000), SIMDE_FLOAT16_VALUE(1110.000000), - SIMDE_FLOAT16_VALUE(-503.750000), SIMDE_FLOAT16_VALUE(-938.000000), - SIMDE_FLOAT16_VALUE(-1546.000000), SIMDE_FLOAT16_VALUE(-348.000000)}}}; + { { SIMDE_FLOAT16_VALUE( 540.00), SIMDE_FLOAT16_VALUE( -927.50), SIMDE_FLOAT16_VALUE( 17.94), SIMDE_FLOAT16_VALUE( 340.75), + SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( 854.00), SIMDE_FLOAT16_VALUE( -110.75) }, + { SIMDE_FLOAT16_VALUE( -936.50), SIMDE_FLOAT16_VALUE( -465.00), SIMDE_FLOAT16_VALUE( -104.00), SIMDE_FLOAT16_VALUE( -348.00), + SIMDE_FLOAT16_VALUE( -370.75), SIMDE_FLOAT16_VALUE( 18.20), SIMDE_FLOAT16_VALUE( -209.25), SIMDE_FLOAT16_VALUE( -856.00) }, + { SIMDE_FLOAT16_VALUE(1005.000000), SIMDE_FLOAT16_VALUE(-1864.000000), SIMDE_FLOAT16_VALUE(366.000000), SIMDE_FLOAT16_VALUE(236.750000), + SIMDE_FLOAT16_VALUE(-416.000000), SIMDE_FLOAT16_VALUE(-204.625000), SIMDE_FLOAT16_VALUE(1710.000000), SIMDE_FLOAT16_VALUE(-320.000000) } }, + { { SIMDE_FLOAT16_VALUE( 251.25), SIMDE_FLOAT16_VALUE( 523.50), SIMDE_FLOAT16_VALUE( -62.34), SIMDE_FLOAT16_VALUE( -412.00), + SIMDE_FLOAT16_VALUE( 818.50), SIMDE_FLOAT16_VALUE( -266.25), SIMDE_FLOAT16_VALUE( 558.50), SIMDE_FLOAT16_VALUE( -474.75) }, + { SIMDE_FLOAT16_VALUE( 583.50), SIMDE_FLOAT16_VALUE( -666.00), SIMDE_FLOAT16_VALUE( 875.50), SIMDE_FLOAT16_VALUE( 382.50), + SIMDE_FLOAT16_VALUE( -535.00), SIMDE_FLOAT16_VALUE( 374.50), SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 432.50) }, + { SIMDE_FLOAT16_VALUE(917.000000), SIMDE_FLOAT16_VALUE(1107.000000), SIMDE_FLOAT16_VALUE(-444.750000), SIMDE_FLOAT16_VALUE(463.500000), + SIMDE_FLOAT16_VALUE(444.000000), SIMDE_FLOAT16_VALUE(-801.000000), SIMDE_FLOAT16_VALUE(126.000000), SIMDE_FLOAT16_VALUE(-385.000000) } }, + { { SIMDE_FLOAT16_VALUE( 83.19), SIMDE_FLOAT16_VALUE( 418.00), SIMDE_FLOAT16_VALUE( -875.50), SIMDE_FLOAT16_VALUE( 830.50), + SIMDE_FLOAT16_VALUE( -633.50), SIMDE_FLOAT16_VALUE( 832.00), SIMDE_FLOAT16_VALUE( -890.00), SIMDE_FLOAT16_VALUE( 650.00) }, + { SIMDE_FLOAT16_VALUE( -111.25), SIMDE_FLOAT16_VALUE( -830.50), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( 970.50), + SIMDE_FLOAT16_VALUE( 522.50), SIMDE_FLOAT16_VALUE( -822.50), SIMDE_FLOAT16_VALUE( 411.25), SIMDE_FLOAT16_VALUE( -692.50) }, + { SIMDE_FLOAT16_VALUE(913.500000), SIMDE_FLOAT16_VALUE(306.750000), SIMDE_FLOAT16_VALUE(-1846.000000), SIMDE_FLOAT16_VALUE(890.000000), + SIMDE_FLOAT16_VALUE(189.000000), SIMDE_FLOAT16_VALUE(1354.000000), SIMDE_FLOAT16_VALUE(-197.500000), SIMDE_FLOAT16_VALUE(1061.000000) } }, + { { SIMDE_FLOAT16_VALUE( 648.50), SIMDE_FLOAT16_VALUE( 385.25), SIMDE_FLOAT16_VALUE( 479.25), SIMDE_FLOAT16_VALUE( -793.50), + SIMDE_FLOAT16_VALUE( -740.50), SIMDE_FLOAT16_VALUE( 245.00), SIMDE_FLOAT16_VALUE( 229.25), SIMDE_FLOAT16_VALUE( -113.25) }, + { SIMDE_FLOAT16_VALUE( 331.50), SIMDE_FLOAT16_VALUE( -677.50), SIMDE_FLOAT16_VALUE( 97.31), SIMDE_FLOAT16_VALUE( -52.09), + SIMDE_FLOAT16_VALUE( 239.12), SIMDE_FLOAT16_VALUE( 469.75), SIMDE_FLOAT16_VALUE( -543.50), SIMDE_FLOAT16_VALUE( 827.00) }, + { SIMDE_FLOAT16_VALUE(1326.000000), SIMDE_FLOAT16_VALUE(717.000000), SIMDE_FLOAT16_VALUE(531.500000), SIMDE_FLOAT16_VALUE(-696.000000), + SIMDE_FLOAT16_VALUE(-1210.000000), SIMDE_FLOAT16_VALUE(484.000000), SIMDE_FLOAT16_VALUE(-598.000000), SIMDE_FLOAT16_VALUE(-657.000000) } }, + { { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( 420.50), + SIMDE_FLOAT16_VALUE( -699.00), SIMDE_FLOAT16_VALUE( -151.12), SIMDE_FLOAT16_VALUE( -388.25), SIMDE_FLOAT16_VALUE( 350.75) }, + { SIMDE_FLOAT16_VALUE( 890.50), SIMDE_FLOAT16_VALUE( -665.00), SIMDE_FLOAT16_VALUE( -618.00), SIMDE_FLOAT16_VALUE( 190.88), + SIMDE_FLOAT16_VALUE( 218.12), SIMDE_FLOAT16_VALUE( -329.00), SIMDE_FLOAT16_VALUE( -549.50), SIMDE_FLOAT16_VALUE( -460.00) }, + { SIMDE_FLOAT16_VALUE(891.500000), SIMDE_FLOAT16_VALUE(1069.000000), SIMDE_FLOAT16_VALUE(-9.125000), SIMDE_FLOAT16_VALUE(-197.500000), + SIMDE_FLOAT16_VALUE(-370.000000), SIMDE_FLOAT16_VALUE(67.000000), SIMDE_FLOAT16_VALUE(71.750000), SIMDE_FLOAT16_VALUE(-198.750000) } }, + { { SIMDE_FLOAT16_VALUE( -378.00), SIMDE_FLOAT16_VALUE( -695.50), SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44), + SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00) }, + { SIMDE_FLOAT16_VALUE( -786.50), SIMDE_FLOAT16_VALUE( -974.00), SIMDE_FLOAT16_VALUE( -570.50), SIMDE_FLOAT16_VALUE( 369.25), + SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( -640.00), SIMDE_FLOAT16_VALUE( -552.00), SIMDE_FLOAT16_VALUE( 75.88) }, + { SIMDE_FLOAT16_VALUE(596.000000), SIMDE_FLOAT16_VALUE(-1482.000000), SIMDE_FLOAT16_VALUE(-247.750000), SIMDE_FLOAT16_VALUE(-649.000000), + SIMDE_FLOAT16_VALUE(-229.500000), SIMDE_FLOAT16_VALUE(-662.000000), SIMDE_FLOAT16_VALUE(381.500000), SIMDE_FLOAT16_VALUE(27.000000) } }, + { { SIMDE_FLOAT16_VALUE( 352.25), SIMDE_FLOAT16_VALUE( 324.25), SIMDE_FLOAT16_VALUE( 485.50), SIMDE_FLOAT16_VALUE( 943.50), + SIMDE_FLOAT16_VALUE( -976.00), SIMDE_FLOAT16_VALUE( 395.50), SIMDE_FLOAT16_VALUE( -100.62), SIMDE_FLOAT16_VALUE( -463.75) }, + { SIMDE_FLOAT16_VALUE( -973.00), SIMDE_FLOAT16_VALUE( -159.62), SIMDE_FLOAT16_VALUE( -377.00), SIMDE_FLOAT16_VALUE( -853.00), + SIMDE_FLOAT16_VALUE( 831.00), SIMDE_FLOAT16_VALUE( 661.00), SIMDE_FLOAT16_VALUE( -717.00), SIMDE_FLOAT16_VALUE( 891.50) }, + { SIMDE_FLOAT16_VALUE(512.000000), SIMDE_FLOAT16_VALUE(-649.000000), SIMDE_FLOAT16_VALUE(1338.000000), SIMDE_FLOAT16_VALUE(566.500000), + SIMDE_FLOAT16_VALUE(-1637.000000), SIMDE_FLOAT16_VALUE(1226.000000), SIMDE_FLOAT16_VALUE(-992.000000), SIMDE_FLOAT16_VALUE(-1181.000000) } }, + { { SIMDE_FLOAT16_VALUE( 535.50), SIMDE_FLOAT16_VALUE( 621.50), SIMDE_FLOAT16_VALUE( 578.50), SIMDE_FLOAT16_VALUE( 430.50), + SIMDE_FLOAT16_VALUE( -574.50), SIMDE_FLOAT16_VALUE( -319.75), SIMDE_FLOAT16_VALUE( -735.00), SIMDE_FLOAT16_VALUE( 248.50) }, + { SIMDE_FLOAT16_VALUE( 274.50), SIMDE_FLOAT16_VALUE( 192.38), SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 502.25), + SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( -70.81), SIMDE_FLOAT16_VALUE( -596.50), SIMDE_FLOAT16_VALUE( 810.50) }, + { SIMDE_FLOAT16_VALUE(343.000000), SIMDE_FLOAT16_VALUE(896.000000), SIMDE_FLOAT16_VALUE(76.250000), SIMDE_FLOAT16_VALUE(1110.000000), + SIMDE_FLOAT16_VALUE(-503.750000), SIMDE_FLOAT16_VALUE(-938.000000), SIMDE_FLOAT16_VALUE(-1546.000000), SIMDE_FLOAT16_VALUE(-348.000000) } } + }; - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); simde_float16x8_t r = simde_vcaddq_rot90_f16(a, b); - simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), - 1); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - for (int i = 0; i < 8; i++) { + for (int i = 0 ; i < 8 ; i++) { simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t r = simde_vcaddq_rot90_f16(a, b); @@ -213,50 +143,51 @@ static int test_simde_vcaddq_rot90_f16(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcadd_rot90_f32(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcadd_rot90_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { - {{SIMDE_FLOAT32_C(863.66), SIMDE_FLOAT32_C(828.31)}, - {SIMDE_FLOAT32_C(-563.51), SIMDE_FLOAT32_C(-576.51)}, - {SIMDE_FLOAT32_C(1440.169922), SIMDE_FLOAT32_C(264.799988)}}, - {{SIMDE_FLOAT32_C(-703.45), SIMDE_FLOAT32_C(383.90)}, - {SIMDE_FLOAT32_C(-772.46), SIMDE_FLOAT32_C(457.40)}, - {SIMDE_FLOAT32_C(-1160.849976), SIMDE_FLOAT32_C(-388.560028)}}, - {{SIMDE_FLOAT32_C(295.99), SIMDE_FLOAT32_C(653.10)}, - {SIMDE_FLOAT32_C(-120.98), SIMDE_FLOAT32_C(945.50)}, - {SIMDE_FLOAT32_C(-649.510010), SIMDE_FLOAT32_C(532.119995)}}, - {{SIMDE_FLOAT32_C(-280.81), SIMDE_FLOAT32_C(631.32)}, - {SIMDE_FLOAT32_C(688.34), SIMDE_FLOAT32_C(191.95)}, - {SIMDE_FLOAT32_C(-472.760010), SIMDE_FLOAT32_C(1319.660034)}}, - {{SIMDE_FLOAT32_C(-522.88), SIMDE_FLOAT32_C(-323.79)}, - {SIMDE_FLOAT32_C(-887.99), SIMDE_FLOAT32_C(-283.70)}, - {SIMDE_FLOAT32_C(-239.179993), SIMDE_FLOAT32_C(-1211.780029)}}, - {{SIMDE_FLOAT32_C(-117.76), SIMDE_FLOAT32_C(-841.45)}, - {SIMDE_FLOAT32_C(664.94), SIMDE_FLOAT32_C(-987.19)}, - {SIMDE_FLOAT32_C(869.429993), SIMDE_FLOAT32_C(-176.510010)}}, - {{SIMDE_FLOAT32_C(-642.89), SIMDE_FLOAT32_C(-152.10)}, - {SIMDE_FLOAT32_C(963.83), SIMDE_FLOAT32_C(919.89)}, - {SIMDE_FLOAT32_C(-1562.780029), SIMDE_FLOAT32_C(811.729980)}}, - {{SIMDE_FLOAT32_C(630.40), SIMDE_FLOAT32_C(-669.33)}, - {SIMDE_FLOAT32_C(671.13), SIMDE_FLOAT32_C(256.93)}, - {SIMDE_FLOAT32_C(373.470032), SIMDE_FLOAT32_C(1.799988)}}}; + { { SIMDE_FLOAT32_C( 863.66), SIMDE_FLOAT32_C( 828.31) }, + { SIMDE_FLOAT32_C( -563.51), SIMDE_FLOAT32_C( -576.51) }, + { SIMDE_FLOAT32_C(1440.169922), SIMDE_FLOAT32_C(264.799988) } }, + { { SIMDE_FLOAT32_C( -703.45), SIMDE_FLOAT32_C( 383.90) }, + { SIMDE_FLOAT32_C( -772.46), SIMDE_FLOAT32_C( 457.40) }, + { SIMDE_FLOAT32_C(-1160.849976), SIMDE_FLOAT32_C(-388.560028) } }, + { { SIMDE_FLOAT32_C( 295.99), SIMDE_FLOAT32_C( 653.10) }, + { SIMDE_FLOAT32_C( -120.98), SIMDE_FLOAT32_C( 945.50) }, + { SIMDE_FLOAT32_C(-649.510010), SIMDE_FLOAT32_C(532.119995) } }, + { { SIMDE_FLOAT32_C( -280.81), SIMDE_FLOAT32_C( 631.32) }, + { SIMDE_FLOAT32_C( 688.34), SIMDE_FLOAT32_C( 191.95) }, + { SIMDE_FLOAT32_C(-472.760010), SIMDE_FLOAT32_C(1319.660034) } }, + { { SIMDE_FLOAT32_C( -522.88), SIMDE_FLOAT32_C( -323.79) }, + { SIMDE_FLOAT32_C( -887.99), SIMDE_FLOAT32_C( -283.70) }, + { SIMDE_FLOAT32_C(-239.179993), SIMDE_FLOAT32_C(-1211.780029) } }, + { { SIMDE_FLOAT32_C( -117.76), SIMDE_FLOAT32_C( -841.45) }, + { SIMDE_FLOAT32_C( 664.94), SIMDE_FLOAT32_C( -987.19) }, + { SIMDE_FLOAT32_C(869.429993), SIMDE_FLOAT32_C(-176.510010) } }, + { { SIMDE_FLOAT32_C( -642.89), SIMDE_FLOAT32_C( -152.10) }, + { SIMDE_FLOAT32_C( 963.83), SIMDE_FLOAT32_C( 919.89) }, + { SIMDE_FLOAT32_C(-1562.780029), SIMDE_FLOAT32_C(811.729980) } }, + { { SIMDE_FLOAT32_C( 630.40), SIMDE_FLOAT32_C( -669.33) }, + { SIMDE_FLOAT32_C( 671.13), SIMDE_FLOAT32_C( 256.93) }, + { SIMDE_FLOAT32_C(373.470032), SIMDE_FLOAT32_C( 1.799988) } } + }; - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vcadd_rot90_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - for (int i = 0; i < 8; i++) { + for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vcadd_rot90_f32(a, b); @@ -269,74 +200,50 @@ static int test_simde_vcadd_rot90_f32(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcaddq_rot90_f32(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcaddq_rot90_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { - {{SIMDE_FLOAT32_C(-337.31), SIMDE_FLOAT32_C(-857.36), - SIMDE_FLOAT32_C(334.71), SIMDE_FLOAT32_C(-617.33)}, - {SIMDE_FLOAT32_C(-439.38), SIMDE_FLOAT32_C(245.13), - SIMDE_FLOAT32_C(111.06), SIMDE_FLOAT32_C(520.69)}, - {SIMDE_FLOAT32_C(-582.440002), SIMDE_FLOAT32_C(-1296.739990), - SIMDE_FLOAT32_C(-185.980011), SIMDE_FLOAT32_C(-506.270020)}}, - {{SIMDE_FLOAT32_C(85.49), SIMDE_FLOAT32_C(250.19), - SIMDE_FLOAT32_C(-679.96), SIMDE_FLOAT32_C(-750.25)}, - {SIMDE_FLOAT32_C(-138.26), SIMDE_FLOAT32_C(-14.62), - SIMDE_FLOAT32_C(-921.52), SIMDE_FLOAT32_C(225.91)}, - {SIMDE_FLOAT32_C(100.110001), SIMDE_FLOAT32_C(111.930008), - SIMDE_FLOAT32_C(-905.869995), SIMDE_FLOAT32_C(-1671.770020)}}, - {{SIMDE_FLOAT32_C(242.83), SIMDE_FLOAT32_C(869.28), - SIMDE_FLOAT32_C(297.95), SIMDE_FLOAT32_C(105.66)}, - {SIMDE_FLOAT32_C(-722.51), SIMDE_FLOAT32_C(-802.37), - SIMDE_FLOAT32_C(-245.78), SIMDE_FLOAT32_C(915.39)}, - {SIMDE_FLOAT32_C(1045.199951), SIMDE_FLOAT32_C(146.770020), - SIMDE_FLOAT32_C(-617.440002), SIMDE_FLOAT32_C(-140.119995)}}, - {{SIMDE_FLOAT32_C(54.20), SIMDE_FLOAT32_C(-928.06), - SIMDE_FLOAT32_C(362.39), SIMDE_FLOAT32_C(-936.63)}, - {SIMDE_FLOAT32_C(185.82), SIMDE_FLOAT32_C(-244.43), - SIMDE_FLOAT32_C(924.66), SIMDE_FLOAT32_C(-643.82)}, - {SIMDE_FLOAT32_C(298.630005), SIMDE_FLOAT32_C(-742.239990), - SIMDE_FLOAT32_C(1006.210022), SIMDE_FLOAT32_C(-11.970032)}}, - {{SIMDE_FLOAT32_C(-516.92), SIMDE_FLOAT32_C(-615.16), - SIMDE_FLOAT32_C(-751.52), SIMDE_FLOAT32_C(-974.04)}, - {SIMDE_FLOAT32_C(-144.42), SIMDE_FLOAT32_C(338.27), - SIMDE_FLOAT32_C(704.92), SIMDE_FLOAT32_C(116.90)}, - {SIMDE_FLOAT32_C(-855.189941), SIMDE_FLOAT32_C(-759.579956), - SIMDE_FLOAT32_C(-868.420044), SIMDE_FLOAT32_C(-269.119995)}}, - {{SIMDE_FLOAT32_C(49.39), SIMDE_FLOAT32_C(-363.00), - SIMDE_FLOAT32_C(-476.30), SIMDE_FLOAT32_C(106.71)}, - {SIMDE_FLOAT32_C(-725.84), SIMDE_FLOAT32_C(-353.71), - SIMDE_FLOAT32_C(268.41), SIMDE_FLOAT32_C(728.83)}, - {SIMDE_FLOAT32_C(403.099976), SIMDE_FLOAT32_C(-1088.840088), - SIMDE_FLOAT32_C(-1205.130005), SIMDE_FLOAT32_C(375.119995)}}, - {{SIMDE_FLOAT32_C(-470.26), SIMDE_FLOAT32_C(81.87), - SIMDE_FLOAT32_C(72.27), SIMDE_FLOAT32_C(-992.59)}, - {SIMDE_FLOAT32_C(-615.67), SIMDE_FLOAT32_C(620.28), - SIMDE_FLOAT32_C(-606.51), SIMDE_FLOAT32_C(-327.77)}, - {SIMDE_FLOAT32_C(-1090.540039), SIMDE_FLOAT32_C(-533.799988), - SIMDE_FLOAT32_C(400.039978), SIMDE_FLOAT32_C(-1599.100098)}}, - {{SIMDE_FLOAT32_C(-331.80), SIMDE_FLOAT32_C(-605.94), - SIMDE_FLOAT32_C(-295.86), SIMDE_FLOAT32_C(-275.42)}, - {SIMDE_FLOAT32_C(-752.64), SIMDE_FLOAT32_C(21.91), - SIMDE_FLOAT32_C(826.83), SIMDE_FLOAT32_C(599.82)}, - {SIMDE_FLOAT32_C(-353.709991), SIMDE_FLOAT32_C(-1358.580078), - SIMDE_FLOAT32_C(-895.679993), SIMDE_FLOAT32_C(551.410034)}}}; + { { SIMDE_FLOAT32_C( -337.31), SIMDE_FLOAT32_C( -857.36), SIMDE_FLOAT32_C( 334.71), SIMDE_FLOAT32_C( -617.33) }, + { SIMDE_FLOAT32_C( -439.38), SIMDE_FLOAT32_C( 245.13), SIMDE_FLOAT32_C( 111.06), SIMDE_FLOAT32_C( 520.69) }, + { SIMDE_FLOAT32_C(-582.440002), SIMDE_FLOAT32_C(-1296.739990), SIMDE_FLOAT32_C(-185.980011), SIMDE_FLOAT32_C(-506.270020) } }, + { { SIMDE_FLOAT32_C( 85.49), SIMDE_FLOAT32_C( 250.19), SIMDE_FLOAT32_C( -679.96), SIMDE_FLOAT32_C( -750.25) }, + { SIMDE_FLOAT32_C( -138.26), SIMDE_FLOAT32_C( -14.62), SIMDE_FLOAT32_C( -921.52), SIMDE_FLOAT32_C( 225.91) }, + { SIMDE_FLOAT32_C(100.110001), SIMDE_FLOAT32_C(111.930008), SIMDE_FLOAT32_C(-905.869995), SIMDE_FLOAT32_C(-1671.770020) } }, + { { SIMDE_FLOAT32_C( 242.83), SIMDE_FLOAT32_C( 869.28), SIMDE_FLOAT32_C( 297.95), SIMDE_FLOAT32_C( 105.66) }, + { SIMDE_FLOAT32_C( -722.51), SIMDE_FLOAT32_C( -802.37), SIMDE_FLOAT32_C( -245.78), SIMDE_FLOAT32_C( 915.39) }, + { SIMDE_FLOAT32_C(1045.199951), SIMDE_FLOAT32_C(146.770020), SIMDE_FLOAT32_C(-617.440002), SIMDE_FLOAT32_C(-140.119995) } }, + { { SIMDE_FLOAT32_C( 54.20), SIMDE_FLOAT32_C( -928.06), SIMDE_FLOAT32_C( 362.39), SIMDE_FLOAT32_C( -936.63) }, + { SIMDE_FLOAT32_C( 185.82), SIMDE_FLOAT32_C( -244.43), SIMDE_FLOAT32_C( 924.66), SIMDE_FLOAT32_C( -643.82) }, + { SIMDE_FLOAT32_C(298.630005), SIMDE_FLOAT32_C(-742.239990), SIMDE_FLOAT32_C(1006.210022), SIMDE_FLOAT32_C(-11.970032) } }, + { { SIMDE_FLOAT32_C( -516.92), SIMDE_FLOAT32_C( -615.16), SIMDE_FLOAT32_C( -751.52), SIMDE_FLOAT32_C( -974.04) }, + { SIMDE_FLOAT32_C( -144.42), SIMDE_FLOAT32_C( 338.27), SIMDE_FLOAT32_C( 704.92), SIMDE_FLOAT32_C( 116.90) }, + { SIMDE_FLOAT32_C(-855.189941), SIMDE_FLOAT32_C(-759.579956), SIMDE_FLOAT32_C(-868.420044), SIMDE_FLOAT32_C(-269.119995) } }, + { { SIMDE_FLOAT32_C( 49.39), SIMDE_FLOAT32_C( -363.00), SIMDE_FLOAT32_C( -476.30), SIMDE_FLOAT32_C( 106.71) }, + { SIMDE_FLOAT32_C( -725.84), SIMDE_FLOAT32_C( -353.71), SIMDE_FLOAT32_C( 268.41), SIMDE_FLOAT32_C( 728.83) }, + { SIMDE_FLOAT32_C(403.099976), SIMDE_FLOAT32_C(-1088.840088), SIMDE_FLOAT32_C(-1205.130005), SIMDE_FLOAT32_C(375.119995) } }, + { { SIMDE_FLOAT32_C( -470.26), SIMDE_FLOAT32_C( 81.87), SIMDE_FLOAT32_C( 72.27), SIMDE_FLOAT32_C( -992.59) }, + { SIMDE_FLOAT32_C( -615.67), SIMDE_FLOAT32_C( 620.28), SIMDE_FLOAT32_C( -606.51), SIMDE_FLOAT32_C( -327.77) }, + { SIMDE_FLOAT32_C(-1090.540039), SIMDE_FLOAT32_C(-533.799988), SIMDE_FLOAT32_C(400.039978), SIMDE_FLOAT32_C(-1599.100098) } }, + { { SIMDE_FLOAT32_C( -331.80), SIMDE_FLOAT32_C( -605.94), SIMDE_FLOAT32_C( -295.86), SIMDE_FLOAT32_C( -275.42) }, + { SIMDE_FLOAT32_C( -752.64), SIMDE_FLOAT32_C( 21.91), SIMDE_FLOAT32_C( 826.83), SIMDE_FLOAT32_C( 599.82) }, + { SIMDE_FLOAT32_C(-353.709991), SIMDE_FLOAT32_C(-1358.580078), SIMDE_FLOAT32_C(-895.679993), SIMDE_FLOAT32_C(551.410034) } } + }; - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vcaddq_rot90_f32(a, b); - simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), - 1); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - for (int i = 0; i < 8; i++) { + for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcaddq_rot90_f32(a, b); @@ -349,50 +256,50 @@ static int test_simde_vcaddq_rot90_f32(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcaddq_rot90_f64(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcaddq_rot90_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { - {{SIMDE_FLOAT64_C(-30.36), SIMDE_FLOAT64_C(631.53)}, - {SIMDE_FLOAT64_C(850.75), SIMDE_FLOAT64_C(-263.55)}, - {SIMDE_FLOAT64_C(233.190000), SIMDE_FLOAT64_C(1482.280000)}}, - {{SIMDE_FLOAT64_C(139.96), SIMDE_FLOAT64_C(859.14)}, - {SIMDE_FLOAT64_C(-834.47), SIMDE_FLOAT64_C(216.10)}, - {SIMDE_FLOAT64_C(-76.140000), SIMDE_FLOAT64_C(24.670000)}}, - {{SIMDE_FLOAT64_C(995.86), SIMDE_FLOAT64_C(529.74)}, - {SIMDE_FLOAT64_C(79.08), SIMDE_FLOAT64_C(947.13)}, - {SIMDE_FLOAT64_C(48.730000), SIMDE_FLOAT64_C(608.820000)}}, - {{SIMDE_FLOAT64_C(122.02), SIMDE_FLOAT64_C(-250.00)}, - {SIMDE_FLOAT64_C(-361.82), SIMDE_FLOAT64_C(265.24)}, - {SIMDE_FLOAT64_C(-143.220000), SIMDE_FLOAT64_C(-611.820000)}}, - {{SIMDE_FLOAT64_C(275.71), SIMDE_FLOAT64_C(2.71)}, - {SIMDE_FLOAT64_C(99.79), SIMDE_FLOAT64_C(-137.67)}, - {SIMDE_FLOAT64_C(413.380000), SIMDE_FLOAT64_C(102.500000)}}, - {{SIMDE_FLOAT64_C(-761.19), SIMDE_FLOAT64_C(813.19)}, - {SIMDE_FLOAT64_C(-897.68), SIMDE_FLOAT64_C(653.58)}, - {SIMDE_FLOAT64_C(-1414.770000), SIMDE_FLOAT64_C(-84.490000)}}, - {{SIMDE_FLOAT64_C(396.02), SIMDE_FLOAT64_C(413.06)}, - {SIMDE_FLOAT64_C(514.09), SIMDE_FLOAT64_C(-977.67)}, - {SIMDE_FLOAT64_C(1373.690000), SIMDE_FLOAT64_C(927.150000)}}, - {{SIMDE_FLOAT64_C(-671.79), SIMDE_FLOAT64_C(-92.13)}, - {SIMDE_FLOAT64_C(-441.32), SIMDE_FLOAT64_C(-374.27)}, - {SIMDE_FLOAT64_C(-297.520000), SIMDE_FLOAT64_C(-533.450000)}}}; + { { SIMDE_FLOAT64_C( -30.36), SIMDE_FLOAT64_C( 631.53) }, + { SIMDE_FLOAT64_C( 850.75), SIMDE_FLOAT64_C( -263.55) }, + { SIMDE_FLOAT64_C(233.190000), SIMDE_FLOAT64_C(1482.280000) } }, + { { SIMDE_FLOAT64_C( 139.96), SIMDE_FLOAT64_C( 859.14) }, + { SIMDE_FLOAT64_C( -834.47), SIMDE_FLOAT64_C( 216.10) }, + { SIMDE_FLOAT64_C(-76.140000), SIMDE_FLOAT64_C(24.670000) } }, + { { SIMDE_FLOAT64_C( 995.86), SIMDE_FLOAT64_C( 529.74) }, + { SIMDE_FLOAT64_C( 79.08), SIMDE_FLOAT64_C( 947.13) }, + { SIMDE_FLOAT64_C(48.730000), SIMDE_FLOAT64_C(608.820000) } }, + { { SIMDE_FLOAT64_C( 122.02), SIMDE_FLOAT64_C( -250.00) }, + { SIMDE_FLOAT64_C( -361.82), SIMDE_FLOAT64_C( 265.24) }, + { SIMDE_FLOAT64_C(-143.220000), SIMDE_FLOAT64_C(-611.820000) } }, + { { SIMDE_FLOAT64_C( 275.71), SIMDE_FLOAT64_C( 2.71) }, + { SIMDE_FLOAT64_C( 99.79), SIMDE_FLOAT64_C( -137.67) }, + { SIMDE_FLOAT64_C(413.380000), SIMDE_FLOAT64_C(102.500000) } }, + { { SIMDE_FLOAT64_C( -761.19), SIMDE_FLOAT64_C( 813.19) }, + { SIMDE_FLOAT64_C( -897.68), SIMDE_FLOAT64_C( 653.58) }, + { SIMDE_FLOAT64_C(-1414.770000), SIMDE_FLOAT64_C(-84.490000) } }, + { { SIMDE_FLOAT64_C( 396.02), SIMDE_FLOAT64_C( 413.06) }, + { SIMDE_FLOAT64_C( 514.09), SIMDE_FLOAT64_C( -977.67) }, + { SIMDE_FLOAT64_C(1373.690000), SIMDE_FLOAT64_C(927.150000) } }, + { { SIMDE_FLOAT64_C( -671.79), SIMDE_FLOAT64_C( -92.13) }, + { SIMDE_FLOAT64_C( -441.32), SIMDE_FLOAT64_C( -374.27) }, + { SIMDE_FLOAT64_C(-297.520000), SIMDE_FLOAT64_C(-533.450000) } } + }; - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r = simde_vcaddq_rot90_f64(a, b); - simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), - 1); + simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - for (int i = 0; i < 8; i++) { + for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0f, 1000.0f); simde_float64x2_t b = simde_test_arm_neon_random_f64x2(-1000.0f, 1000.0f); simde_float64x2_t r = simde_vcaddq_rot90_f64(a, b); diff --git a/test/arm/neon/cmla_lane.c b/test/arm/neon/cmla_lane.c index 0530e8322..13e97a520 100644 --- a/test/arm/neon/cmla_lane.c +++ b/test/arm/neon/cmla_lane.c @@ -1,11 +1,11 @@ #define SIMDE_TEST_ARM_NEON_INSN cmla_lane +#include "test-neon.h" #include "../../../simde/arm/neon/cmla_lane.h" - #include "../../../simde/arm/neon/dup_n.h" -#include "test-neon.h" -static int test_simde_vcmla_lane_f16(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmla_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float16_t r_[4]; @@ -14,106 +14,71 @@ static int test_simde_vcmla_lane_f16(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[4]; } test_vec[] = { - {{SIMDE_FLOAT16_VALUE(-49.28), SIMDE_FLOAT16_VALUE(-109.00), - SIMDE_FLOAT16_VALUE(-626.50), SIMDE_FLOAT16_VALUE(-567.00)}, - {SIMDE_FLOAT16_VALUE(-178.88), SIMDE_FLOAT16_VALUE(10.22), - SIMDE_FLOAT16_VALUE(-228.12), SIMDE_FLOAT16_VALUE(-31.19)}, - {SIMDE_FLOAT16_VALUE(42.90), SIMDE_FLOAT16_VALUE(-98.75), - SIMDE_FLOAT16_VALUE(350.00), SIMDE_FLOAT16_VALUE(-48.00)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-7724.000000), SIMDE_FLOAT16_VALUE(-7784.000000), - SIMDE_FLOAT16_VALUE(-10416.000000), - SIMDE_FLOAT16_VALUE(-10352.000000)}}, - {{SIMDE_FLOAT16_VALUE(730.00), SIMDE_FLOAT16_VALUE(103.12), - SIMDE_FLOAT16_VALUE(-381.25), SIMDE_FLOAT16_VALUE(898.50)}, - {SIMDE_FLOAT16_VALUE(293.00), SIMDE_FLOAT16_VALUE(-114.06), - SIMDE_FLOAT16_VALUE(-137.62), SIMDE_FLOAT16_VALUE(722.50)}, - {SIMDE_FLOAT16_VALUE(-6.33), SIMDE_FLOAT16_VALUE(161.38), - SIMDE_FLOAT16_VALUE(115.50), SIMDE_FLOAT16_VALUE(70.38)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(48000.000000), SIMDE_FLOAT16_VALUE(47392.000000), - SIMDE_FLOAT16_VALUE(-22592.000000), - SIMDE_FLOAT16_VALUE(-21312.000000)}}, - {{SIMDE_FLOAT16_VALUE(-210.88), SIMDE_FLOAT16_VALUE(-430.75), - SIMDE_FLOAT16_VALUE(725.50), SIMDE_FLOAT16_VALUE(-944.50)}, - {SIMDE_FLOAT16_VALUE(142.00), SIMDE_FLOAT16_VALUE(-966.00), - SIMDE_FLOAT16_VALUE(-131.50), SIMDE_FLOAT16_VALUE(462.50)}, - {SIMDE_FLOAT16_VALUE(-361.00), SIMDE_FLOAT16_VALUE(185.62), - SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(749.50)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-51488.000000), SIMDE_FLOAT16_VALUE(-51680.000000), - SIMDE_FLOAT16_VALUE(48192.000000), SIMDE_FLOAT16_VALUE(46528.000000)}}, - {{SIMDE_FLOAT16_VALUE(89.44), SIMDE_FLOAT16_VALUE(-200.50), - SIMDE_FLOAT16_VALUE(-136.50), SIMDE_FLOAT16_VALUE(-180.50)}, - {SIMDE_FLOAT16_VALUE(-157.12), SIMDE_FLOAT16_VALUE(129.00), - SIMDE_FLOAT16_VALUE(99.06), SIMDE_FLOAT16_VALUE(-75.25)}, - {SIMDE_FLOAT16_VALUE(32.59), SIMDE_FLOAT16_VALUE(239.50), - SIMDE_FLOAT16_VALUE(-29.96), SIMDE_FLOAT16_VALUE(-177.50)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-37536.000000), SIMDE_FLOAT16_VALUE(-37824.000000), - SIMDE_FLOAT16_VALUE(23584.000000), SIMDE_FLOAT16_VALUE(23552.000000)}}, - {{SIMDE_FLOAT16_VALUE(167.25), SIMDE_FLOAT16_VALUE(-1.52), - SIMDE_FLOAT16_VALUE(-63.38), SIMDE_FLOAT16_VALUE(57.00)}, - {SIMDE_FLOAT16_VALUE(191.75), SIMDE_FLOAT16_VALUE(-197.00), - SIMDE_FLOAT16_VALUE(285.00), SIMDE_FLOAT16_VALUE(-529.00)}, - {SIMDE_FLOAT16_VALUE(-180.50), SIMDE_FLOAT16_VALUE(375.50), - SIMDE_FLOAT16_VALUE(-206.00), SIMDE_FLOAT16_VALUE(-75.25)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-34432.000000), SIMDE_FLOAT16_VALUE(-34624.000000), - SIMDE_FLOAT16_VALUE(-51520.000000), - SIMDE_FLOAT16_VALUE(-51392.000000)}}, - {{SIMDE_FLOAT16_VALUE(-31.19), SIMDE_FLOAT16_VALUE(-75.25), - SIMDE_FLOAT16_VALUE(-41.00), SIMDE_FLOAT16_VALUE(285.00)}, - {SIMDE_FLOAT16_VALUE(-1.52), SIMDE_FLOAT16_VALUE(10.22), - SIMDE_FLOAT16_VALUE(-271.25), SIMDE_FLOAT16_VALUE(-257.50)}, - {SIMDE_FLOAT16_VALUE(-31.45), SIMDE_FLOAT16_VALUE(-180.50), - SIMDE_FLOAT16_VALUE(69.62), SIMDE_FLOAT16_VALUE(131.38)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(243.125000), SIMDE_FLOAT16_VALUE(199.000000), - SIMDE_FLOAT16_VALUE(48928.000000), SIMDE_FLOAT16_VALUE(49248.000000)}}, - {{SIMDE_FLOAT16_VALUE(205.75), SIMDE_FLOAT16_VALUE(-247.00), - SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(17.94)}, - {SIMDE_FLOAT16_VALUE(-397.75), SIMDE_FLOAT16_VALUE(166.12), - SIMDE_FLOAT16_VALUE(-110.75), SIMDE_FLOAT16_VALUE(18.20)}, - {SIMDE_FLOAT16_VALUE(89.81), SIMDE_FLOAT16_VALUE(59.75), - SIMDE_FLOAT16_VALUE(-113.25), SIMDE_FLOAT16_VALUE(97.31)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-35520.000000), SIMDE_FLOAT16_VALUE(-35968.000000), - SIMDE_FLOAT16_VALUE(-9888.000000), SIMDE_FLOAT16_VALUE(-9928.000000)}}, - {{SIMDE_FLOAT16_VALUE(-869.50), SIMDE_FLOAT16_VALUE(-710.00), - SIMDE_FLOAT16_VALUE(457.25), SIMDE_FLOAT16_VALUE(579.00)}, - {SIMDE_FLOAT16_VALUE(226.38), SIMDE_FLOAT16_VALUE(178.88), - SIMDE_FLOAT16_VALUE(181.75), SIMDE_FLOAT16_VALUE(-151.12)}, - {SIMDE_FLOAT16_VALUE(121.50), SIMDE_FLOAT16_VALUE(-78.44), - SIMDE_FLOAT16_VALUE(48.06), SIMDE_FLOAT16_VALUE(75.88)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-18624.000000), SIMDE_FLOAT16_VALUE(-18464.000000), - SIMDE_FLOAT16_VALUE(-13800.000000), SIMDE_FLOAT16_VALUE(-13680.000000)}} - + { + { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) }, + { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -31.19) }, + { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -48.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-7724.000000), SIMDE_FLOAT16_VALUE(-7784.000000), SIMDE_FLOAT16_VALUE(-10416.000000), SIMDE_FLOAT16_VALUE(-10352.000000) } }, + { { SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, + { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, + { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(48000.000000), SIMDE_FLOAT16_VALUE(47392.000000), SIMDE_FLOAT16_VALUE(-22592.000000), SIMDE_FLOAT16_VALUE(-21312.000000) } }, + { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, + { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, + { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-51488.000000), SIMDE_FLOAT16_VALUE(-51680.000000), SIMDE_FLOAT16_VALUE(48192.000000), SIMDE_FLOAT16_VALUE(46528.000000) } }, + { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -180.50) }, + { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 129.00), SIMDE_FLOAT16_VALUE( 99.06), SIMDE_FLOAT16_VALUE( -75.25) }, + { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 239.50), SIMDE_FLOAT16_VALUE( -29.96), SIMDE_FLOAT16_VALUE( -177.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-37536.000000), SIMDE_FLOAT16_VALUE(-37824.000000), SIMDE_FLOAT16_VALUE(23584.000000), SIMDE_FLOAT16_VALUE(23552.000000) } }, + { { SIMDE_FLOAT16_VALUE( 167.25), SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( -63.38), SIMDE_FLOAT16_VALUE( 57.00) }, + { SIMDE_FLOAT16_VALUE( 191.75), SIMDE_FLOAT16_VALUE( -197.00), SIMDE_FLOAT16_VALUE( 285.00), SIMDE_FLOAT16_VALUE( -529.00) }, + { SIMDE_FLOAT16_VALUE( -180.50), SIMDE_FLOAT16_VALUE( 375.50), SIMDE_FLOAT16_VALUE( -206.00), SIMDE_FLOAT16_VALUE( -75.25) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-34432.000000), SIMDE_FLOAT16_VALUE(-34624.000000), SIMDE_FLOAT16_VALUE(-51520.000000), SIMDE_FLOAT16_VALUE(-51392.000000) } }, + { { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( -41.00), SIMDE_FLOAT16_VALUE( 285.00) }, + { SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -271.25), SIMDE_FLOAT16_VALUE( -257.50) }, + { SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( -180.50), SIMDE_FLOAT16_VALUE( 69.62), SIMDE_FLOAT16_VALUE( 131.38) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(243.125000), SIMDE_FLOAT16_VALUE(199.000000), SIMDE_FLOAT16_VALUE(48928.000000), SIMDE_FLOAT16_VALUE(49248.000000) } }, + { { SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -247.00), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 17.94) }, + { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( -110.75), SIMDE_FLOAT16_VALUE( 18.20) }, + { SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 97.31) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-35520.000000), SIMDE_FLOAT16_VALUE(-35968.000000), SIMDE_FLOAT16_VALUE(-9888.000000), SIMDE_FLOAT16_VALUE(-9928.000000) } }, + { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00) }, + { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( -151.12) }, + { SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44), SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( 75.88) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-18624.000000), SIMDE_FLOAT16_VALUE(-18464.000000), SIMDE_FLOAT16_VALUE(-13800.000000), SIMDE_FLOAT16_VALUE(-13680.000000) } } + }; - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x4_t r; - SIMDE_CONSTIFY_2_( - simde_vcmla_lane_f16, r, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_(simde_vcmla_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + } - + return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t r = simde_vcmla_lane_f16(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -124,7 +89,8 @@ static int test_simde_vcmla_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmla_lane_f32(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmla_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[2]; @@ -133,65 +99,66 @@ static int test_simde_vcmla_lane_f32(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[2]; } test_vec[] = { - {{SIMDE_FLOAT32_C(83.21), SIMDE_FLOAT32_C(417.90)}, - {SIMDE_FLOAT32_C(-875.72), SIMDE_FLOAT32_C(830.54)}, - {SIMDE_FLOAT32_C(-633.53), SIMDE_FLOAT32_C(832.17)}, - INT32_C(0), - {SIMDE_FLOAT32_C(554878.125000), SIMDE_FLOAT32_C(555212.812500)}}, - {{SIMDE_FLOAT32_C(-890.17), SIMDE_FLOAT32_C(649.92)}, - {SIMDE_FLOAT32_C(-111.22), SIMDE_FLOAT32_C(-830.36)}, - {SIMDE_FLOAT32_C(59.76), SIMDE_FLOAT32_C(970.61)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-7536.677246), SIMDE_FLOAT32_C(-5996.586914)}}, - {{SIMDE_FLOAT32_C(522.31), SIMDE_FLOAT32_C(-822.40)}, - {SIMDE_FLOAT32_C(411.34), SIMDE_FLOAT32_C(-692.35)}, - {SIMDE_FLOAT32_C(648.71), SIMDE_FLOAT32_C(385.20)}, - INT32_C(0), - {SIMDE_FLOAT32_C(267362.687500), SIMDE_FLOAT32_C(266017.968750)}}, - {{SIMDE_FLOAT32_C(479.18), SIMDE_FLOAT32_C(-793.73)}, - {SIMDE_FLOAT32_C(-740.26), SIMDE_FLOAT32_C(245.04)}, - {SIMDE_FLOAT32_C(229.26), SIMDE_FLOAT32_C(-113.23)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-169232.828125), SIMDE_FLOAT32_C(-170505.734375)}}, - {{SIMDE_FLOAT32_C(331.48), SIMDE_FLOAT32_C(-677.34)}, - {SIMDE_FLOAT32_C(97.30), SIMDE_FLOAT32_C(-52.10)}, - {SIMDE_FLOAT32_C(239.17), SIMDE_FLOAT32_C(469.68)}, - INT32_C(0), - {SIMDE_FLOAT32_C(23602.720703), SIMDE_FLOAT32_C(22593.902344)}}, - {{SIMDE_FLOAT32_C(-543.40), SIMDE_FLOAT32_C(826.85)}, - {SIMDE_FLOAT32_C(226.38), SIMDE_FLOAT32_C(178.84)}, - {SIMDE_FLOAT32_C(181.71), SIMDE_FLOAT32_C(420.52)}, - INT32_C(0), - {SIMDE_FLOAT32_C(40592.113281), SIMDE_FLOAT32_C(41962.363281)}}, - {{SIMDE_FLOAT32_C(-698.84), SIMDE_FLOAT32_C(-151.15)}, - {SIMDE_FLOAT32_C(-388.27), SIMDE_FLOAT32_C(350.81)}, - {SIMDE_FLOAT32_C(890.40), SIMDE_FLOAT32_C(-664.75)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-346414.437500), SIMDE_FLOAT32_C(-345866.750000)}}, - {{SIMDE_FLOAT32_C(-617.94), SIMDE_FLOAT32_C(190.84)}, - {SIMDE_FLOAT32_C(218.13), SIMDE_FLOAT32_C(-328.97)}, - {SIMDE_FLOAT32_C(-549.59), SIMDE_FLOAT32_C(-459.89)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-120500.015625), SIMDE_FLOAT32_C(-119691.234375)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT32_C( 83.21), SIMDE_FLOAT32_C( 417.90) }, + { SIMDE_FLOAT32_C( -875.72), SIMDE_FLOAT32_C( 830.54) }, + { SIMDE_FLOAT32_C( -633.53), SIMDE_FLOAT32_C( 832.17) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(554878.125000), SIMDE_FLOAT32_C(555212.812500) } }, + { { SIMDE_FLOAT32_C( -890.17), SIMDE_FLOAT32_C( 649.92) }, + { SIMDE_FLOAT32_C( -111.22), SIMDE_FLOAT32_C( -830.36) }, + { SIMDE_FLOAT32_C( 59.76), SIMDE_FLOAT32_C( 970.61) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-7536.677246), SIMDE_FLOAT32_C(-5996.586914) } }, + { { SIMDE_FLOAT32_C( 522.31), SIMDE_FLOAT32_C( -822.40) }, + { SIMDE_FLOAT32_C( 411.34), SIMDE_FLOAT32_C( -692.35) }, + { SIMDE_FLOAT32_C( 648.71), SIMDE_FLOAT32_C( 385.20) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(267362.687500), SIMDE_FLOAT32_C(266017.968750) } }, + { { SIMDE_FLOAT32_C( 479.18), SIMDE_FLOAT32_C( -793.73) }, + { SIMDE_FLOAT32_C( -740.26), SIMDE_FLOAT32_C( 245.04) }, + { SIMDE_FLOAT32_C( 229.26), SIMDE_FLOAT32_C( -113.23) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-169232.828125), SIMDE_FLOAT32_C(-170505.734375) } }, + { { SIMDE_FLOAT32_C( 331.48), SIMDE_FLOAT32_C( -677.34) }, + { SIMDE_FLOAT32_C( 97.30), SIMDE_FLOAT32_C( -52.10) }, + { SIMDE_FLOAT32_C( 239.17), SIMDE_FLOAT32_C( 469.68) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(23602.720703), SIMDE_FLOAT32_C(22593.902344) } }, + { { SIMDE_FLOAT32_C( -543.40), SIMDE_FLOAT32_C( 826.85) }, + { SIMDE_FLOAT32_C( 226.38), SIMDE_FLOAT32_C( 178.84) }, + { SIMDE_FLOAT32_C( 181.71), SIMDE_FLOAT32_C( 420.52) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(40592.113281), SIMDE_FLOAT32_C(41962.363281) } }, + { { SIMDE_FLOAT32_C( -698.84), SIMDE_FLOAT32_C( -151.15) }, + { SIMDE_FLOAT32_C( -388.27), SIMDE_FLOAT32_C( 350.81) }, + { SIMDE_FLOAT32_C( 890.40), SIMDE_FLOAT32_C( -664.75) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-346414.437500), SIMDE_FLOAT32_C(-345866.750000) } }, + { { SIMDE_FLOAT32_C( -617.94), SIMDE_FLOAT32_C( 190.84) }, + { SIMDE_FLOAT32_C( 218.13), SIMDE_FLOAT32_C( -328.97) }, + { SIMDE_FLOAT32_C( -549.59), SIMDE_FLOAT32_C( -459.89) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-120500.015625), SIMDE_FLOAT32_C(-119691.234375) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vcmla_lane_f32(r_, a, b, 0); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 0, 0, 0, 0, 0, 0, 0}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vcmla_lane_f32(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -202,7 +169,9 @@ static int test_simde_vcmla_lane_f32(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmla_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { + +static int +test_simde_vcmla_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float16_t r_[4]; @@ -211,120 +180,76 @@ static int test_simde_vcmla_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[4]; } test_vec[] = { - {{SIMDE_FLOAT16_VALUE(816.50), SIMDE_FLOAT16_VALUE(773.50), - SIMDE_FLOAT16_VALUE(-377.75), SIMDE_FLOAT16_VALUE(544.50)}, - {SIMDE_FLOAT16_VALUE(-188.38), SIMDE_FLOAT16_VALUE(269.75), - SIMDE_FLOAT16_VALUE(115.62), SIMDE_FLOAT16_VALUE(-544.00)}, - {SIMDE_FLOAT16_VALUE(42.90), SIMDE_FLOAT16_VALUE(924.50), - SIMDE_FLOAT16_VALUE(484.00), SIMDE_FLOAT16_VALUE(-757.00), - SIMDE_FLOAT16_VALUE(730.00), SIMDE_FLOAT16_VALUE(103.12), - SIMDE_FLOAT16_VALUE(-381.25), SIMDE_FLOAT16_VALUE(898.50)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-7264.000000), SIMDE_FLOAT16_VALUE(-7308.000000), - SIMDE_FLOAT16_VALUE(4584.000000), SIMDE_FLOAT16_VALUE(5504.000000)}}, - {{SIMDE_FLOAT16_VALUE(293.00), SIMDE_FLOAT16_VALUE(-114.06), - SIMDE_FLOAT16_VALUE(-137.62), SIMDE_FLOAT16_VALUE(722.50)}, - {SIMDE_FLOAT16_VALUE(-6.33), SIMDE_FLOAT16_VALUE(161.38), - SIMDE_FLOAT16_VALUE(115.50), SIMDE_FLOAT16_VALUE(70.38)}, - {SIMDE_FLOAT16_VALUE(-899.00), SIMDE_FLOAT16_VALUE(32.51), - SIMDE_FLOAT16_VALUE(359.00), SIMDE_FLOAT16_VALUE(342.25), - SIMDE_FLOAT16_VALUE(913.00), SIMDE_FLOAT16_VALUE(740.50), - SIMDE_FLOAT16_VALUE(-436.25), SIMDE_FLOAT16_VALUE(-904.00)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(87.312500), SIMDE_FLOAT16_VALUE(-319.750000), - SIMDE_FLOAT16_VALUE(3616.000000), SIMDE_FLOAT16_VALUE(4476.000000)}}, - {{SIMDE_FLOAT16_VALUE(-335.25), SIMDE_FLOAT16_VALUE(-252.38), - SIMDE_FLOAT16_VALUE(227.25), SIMDE_FLOAT16_VALUE(-8.67)}, - {SIMDE_FLOAT16_VALUE(490.50), SIMDE_FLOAT16_VALUE(507.50), - SIMDE_FLOAT16_VALUE(-979.50), SIMDE_FLOAT16_VALUE(-718.00)}, - {SIMDE_FLOAT16_VALUE(-10.20), SIMDE_FLOAT16_VALUE(205.75), - SIMDE_FLOAT16_VALUE(-797.50), SIMDE_FLOAT16_VALUE(741.50), - SIMDE_FLOAT16_VALUE(-835.00), SIMDE_FLOAT16_VALUE(-308.00), - SIMDE_FLOAT16_VALUE(-674.50), SIMDE_FLOAT16_VALUE(-247.00)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-5340.000000), SIMDE_FLOAT16_VALUE(-5256.000000), - SIMDE_FLOAT16_VALUE(10224.000000), SIMDE_FLOAT16_VALUE(9984.000000)}}, - {{SIMDE_FLOAT16_VALUE(-210.88), SIMDE_FLOAT16_VALUE(-430.75), - SIMDE_FLOAT16_VALUE(725.50), SIMDE_FLOAT16_VALUE(-944.50)}, - {SIMDE_FLOAT16_VALUE(142.00), SIMDE_FLOAT16_VALUE(-966.00), - SIMDE_FLOAT16_VALUE(-131.50), SIMDE_FLOAT16_VALUE(462.50)}, - {SIMDE_FLOAT16_VALUE(-361.00), SIMDE_FLOAT16_VALUE(185.62), - SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(749.50), - SIMDE_FLOAT16_VALUE(-975.50), SIMDE_FLOAT16_VALUE(383.50), - SIMDE_FLOAT16_VALUE(500.25), SIMDE_FLOAT16_VALUE(517.00)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(26144.000000), SIMDE_FLOAT16_VALUE(25920.000000), - SIMDE_FLOAT16_VALUE(-23680.000000), - SIMDE_FLOAT16_VALUE(-25360.000000)}}, - {{SIMDE_FLOAT16_VALUE(540.00), SIMDE_FLOAT16_VALUE(-927.50), - SIMDE_FLOAT16_VALUE(17.94), SIMDE_FLOAT16_VALUE(340.75)}, - {SIMDE_FLOAT16_VALUE(-397.75), SIMDE_FLOAT16_VALUE(166.12), - SIMDE_FLOAT16_VALUE(854.00), SIMDE_FLOAT16_VALUE(-110.75)}, - {SIMDE_FLOAT16_VALUE(-53.36), SIMDE_FLOAT16_VALUE(-465.00), - SIMDE_FLOAT16_VALUE(-104.00), SIMDE_FLOAT16_VALUE(-348.00), - SIMDE_FLOAT16_VALUE(-370.75), SIMDE_FLOAT16_VALUE(18.20), - SIMDE_FLOAT16_VALUE(-209.25), SIMDE_FLOAT16_VALUE(-856.00)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(21776.000000), SIMDE_FLOAT16_VALUE(20304.000000), - SIMDE_FLOAT16_VALUE(-45568.000000), - SIMDE_FLOAT16_VALUE(-45248.000000)}}, - {{SIMDE_FLOAT16_VALUE(251.25), SIMDE_FLOAT16_VALUE(523.50), - SIMDE_FLOAT16_VALUE(-62.34), SIMDE_FLOAT16_VALUE(-412.00)}, - {SIMDE_FLOAT16_VALUE(818.50), SIMDE_FLOAT16_VALUE(-266.25), - SIMDE_FLOAT16_VALUE(558.50), SIMDE_FLOAT16_VALUE(-474.75)}, - {SIMDE_FLOAT16_VALUE(583.50), SIMDE_FLOAT16_VALUE(58.66), - SIMDE_FLOAT16_VALUE(875.50), SIMDE_FLOAT16_VALUE(382.50), - SIMDE_FLOAT16_VALUE(-535.00), SIMDE_FLOAT16_VALUE(374.50), - SIMDE_FLOAT16_VALUE(89.81), SIMDE_FLOAT16_VALUE(432.50)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(48256.000000), SIMDE_FLOAT16_VALUE(48544.000000), - SIMDE_FLOAT16_VALUE(32704.000000), SIMDE_FLOAT16_VALUE(32352.000000)}}, - {{SIMDE_FLOAT16_VALUE(83.19), SIMDE_FLOAT16_VALUE(418.00), - SIMDE_FLOAT16_VALUE(-875.50), SIMDE_FLOAT16_VALUE(830.50)}, - {SIMDE_FLOAT16_VALUE(-633.50), SIMDE_FLOAT16_VALUE(832.00), - SIMDE_FLOAT16_VALUE(-890.00), SIMDE_FLOAT16_VALUE(650.00)}, - {SIMDE_FLOAT16_VALUE(-31.19), SIMDE_FLOAT16_VALUE(-830.50), - SIMDE_FLOAT16_VALUE(59.75), SIMDE_FLOAT16_VALUE(970.50), - SIMDE_FLOAT16_VALUE(522.50), SIMDE_FLOAT16_VALUE(-822.50), - SIMDE_FLOAT16_VALUE(411.25), SIMDE_FLOAT16_VALUE(-692.50)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(19840.000000), SIMDE_FLOAT16_VALUE(20176.000000), - SIMDE_FLOAT16_VALUE(26880.000000), SIMDE_FLOAT16_VALUE(28592.000000)}}, - {{SIMDE_FLOAT16_VALUE(648.50), SIMDE_FLOAT16_VALUE(385.25), - SIMDE_FLOAT16_VALUE(479.25), SIMDE_FLOAT16_VALUE(-793.50)}, - {SIMDE_FLOAT16_VALUE(-740.50), SIMDE_FLOAT16_VALUE(245.00), - SIMDE_FLOAT16_VALUE(229.25), SIMDE_FLOAT16_VALUE(-113.25)}, - {SIMDE_FLOAT16_VALUE(331.50), SIMDE_FLOAT16_VALUE(-75.25), - SIMDE_FLOAT16_VALUE(97.31), SIMDE_FLOAT16_VALUE(-52.09), - SIMDE_FLOAT16_VALUE(239.12), SIMDE_FLOAT16_VALUE(469.75), - SIMDE_FLOAT16_VALUE(-543.50), SIMDE_FLOAT16_VALUE(827.00)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(56384.000000), SIMDE_FLOAT16_VALUE(56096.000000), - SIMDE_FLOAT16_VALUE(-16768.000000), - SIMDE_FLOAT16_VALUE(-18048.000000)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT16_VALUE( 816.50), SIMDE_FLOAT16_VALUE( 773.50), SIMDE_FLOAT16_VALUE( -377.75), SIMDE_FLOAT16_VALUE( 544.50) }, + { SIMDE_FLOAT16_VALUE( -188.38), SIMDE_FLOAT16_VALUE( 269.75), SIMDE_FLOAT16_VALUE( 115.62), SIMDE_FLOAT16_VALUE( -544.00) }, + { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( 484.00), SIMDE_FLOAT16_VALUE( -757.00), + SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-7264.000000), SIMDE_FLOAT16_VALUE(-7308.000000), SIMDE_FLOAT16_VALUE(4584.000000), SIMDE_FLOAT16_VALUE(5504.000000) } }, + { { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, + { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, + { SIMDE_FLOAT16_VALUE( -899.00), SIMDE_FLOAT16_VALUE( 32.51), SIMDE_FLOAT16_VALUE( 359.00), SIMDE_FLOAT16_VALUE( 342.25), + SIMDE_FLOAT16_VALUE( 913.00), SIMDE_FLOAT16_VALUE( 740.50), SIMDE_FLOAT16_VALUE( -436.25), SIMDE_FLOAT16_VALUE( -904.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(87.312500), SIMDE_FLOAT16_VALUE(-319.750000), SIMDE_FLOAT16_VALUE(3616.000000), SIMDE_FLOAT16_VALUE(4476.000000) } }, + { { SIMDE_FLOAT16_VALUE( -335.25), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( 227.25), SIMDE_FLOAT16_VALUE( -8.67) }, + { SIMDE_FLOAT16_VALUE( 490.50), SIMDE_FLOAT16_VALUE( 507.50), SIMDE_FLOAT16_VALUE( -979.50), SIMDE_FLOAT16_VALUE( -718.00) }, + { SIMDE_FLOAT16_VALUE( -10.20), SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -797.50), SIMDE_FLOAT16_VALUE( 741.50), + SIMDE_FLOAT16_VALUE( -835.00), SIMDE_FLOAT16_VALUE( -308.00), SIMDE_FLOAT16_VALUE( -674.50), SIMDE_FLOAT16_VALUE( -247.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-5340.000000), SIMDE_FLOAT16_VALUE(-5256.000000), SIMDE_FLOAT16_VALUE(10224.000000), SIMDE_FLOAT16_VALUE(9984.000000) } }, + { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, + { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, + { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50), + SIMDE_FLOAT16_VALUE( -975.50), SIMDE_FLOAT16_VALUE( 383.50), SIMDE_FLOAT16_VALUE( 500.25), SIMDE_FLOAT16_VALUE( 517.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(26144.000000), SIMDE_FLOAT16_VALUE(25920.000000), SIMDE_FLOAT16_VALUE(-23680.000000), SIMDE_FLOAT16_VALUE(-25360.000000) } }, + { { SIMDE_FLOAT16_VALUE( 540.00), SIMDE_FLOAT16_VALUE( -927.50), SIMDE_FLOAT16_VALUE( 17.94), SIMDE_FLOAT16_VALUE( 340.75) }, + { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( 854.00), SIMDE_FLOAT16_VALUE( -110.75) }, + { SIMDE_FLOAT16_VALUE( -53.36), SIMDE_FLOAT16_VALUE( -465.00), SIMDE_FLOAT16_VALUE( -104.00), SIMDE_FLOAT16_VALUE( -348.00), + SIMDE_FLOAT16_VALUE( -370.75), SIMDE_FLOAT16_VALUE( 18.20), SIMDE_FLOAT16_VALUE( -209.25), SIMDE_FLOAT16_VALUE( -856.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(21776.000000), SIMDE_FLOAT16_VALUE(20304.000000), SIMDE_FLOAT16_VALUE(-45568.000000), SIMDE_FLOAT16_VALUE(-45248.000000) } }, + { { SIMDE_FLOAT16_VALUE( 251.25), SIMDE_FLOAT16_VALUE( 523.50), SIMDE_FLOAT16_VALUE( -62.34), SIMDE_FLOAT16_VALUE( -412.00) }, + { SIMDE_FLOAT16_VALUE( 818.50), SIMDE_FLOAT16_VALUE( -266.25), SIMDE_FLOAT16_VALUE( 558.50), SIMDE_FLOAT16_VALUE( -474.75) }, + { SIMDE_FLOAT16_VALUE( 583.50), SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 875.50), SIMDE_FLOAT16_VALUE( 382.50), + SIMDE_FLOAT16_VALUE( -535.00), SIMDE_FLOAT16_VALUE( 374.50), SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 432.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(48256.000000), SIMDE_FLOAT16_VALUE(48544.000000), SIMDE_FLOAT16_VALUE(32704.000000), SIMDE_FLOAT16_VALUE(32352.000000) } }, + { { SIMDE_FLOAT16_VALUE( 83.19), SIMDE_FLOAT16_VALUE( 418.00), SIMDE_FLOAT16_VALUE( -875.50), SIMDE_FLOAT16_VALUE( 830.50) }, + { SIMDE_FLOAT16_VALUE( -633.50), SIMDE_FLOAT16_VALUE( 832.00), SIMDE_FLOAT16_VALUE( -890.00), SIMDE_FLOAT16_VALUE( 650.00) }, + { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -830.50), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( 970.50), + SIMDE_FLOAT16_VALUE( 522.50), SIMDE_FLOAT16_VALUE( -822.50), SIMDE_FLOAT16_VALUE( 411.25), SIMDE_FLOAT16_VALUE( -692.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(19840.000000), SIMDE_FLOAT16_VALUE(20176.000000), SIMDE_FLOAT16_VALUE(26880.000000), SIMDE_FLOAT16_VALUE(28592.000000) } }, + { { SIMDE_FLOAT16_VALUE( 648.50), SIMDE_FLOAT16_VALUE( 385.25), SIMDE_FLOAT16_VALUE( 479.25), SIMDE_FLOAT16_VALUE( -793.50) }, + { SIMDE_FLOAT16_VALUE( -740.50), SIMDE_FLOAT16_VALUE( 245.00), SIMDE_FLOAT16_VALUE( 229.25), SIMDE_FLOAT16_VALUE( -113.25) }, + { SIMDE_FLOAT16_VALUE( 331.50), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( 97.31), SIMDE_FLOAT16_VALUE( -52.09), + SIMDE_FLOAT16_VALUE( 239.12), SIMDE_FLOAT16_VALUE( 469.75), SIMDE_FLOAT16_VALUE( -543.50), SIMDE_FLOAT16_VALUE( 827.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(56384.000000), SIMDE_FLOAT16_VALUE(56096.000000), SIMDE_FLOAT16_VALUE(-16768.000000), SIMDE_FLOAT16_VALUE(-18048.000000) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); simde_float16x4_t r; - SIMDE_CONSTIFY_2_( - simde_vcmla_laneq_f16, r, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_(simde_vcmla_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x4_t r = simde_vcmla_laneq_f16(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -335,7 +260,8 @@ static int test_simde_vcmla_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmla_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmla_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[2]; @@ -344,77 +270,67 @@ static int test_simde_vcmla_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[2]; } test_vec[] = { - {{SIMDE_FLOAT32_C(-470.26), SIMDE_FLOAT32_C(81.87)}, - {SIMDE_FLOAT32_C(72.27), SIMDE_FLOAT32_C(-992.59)}, - {SIMDE_FLOAT32_C(-615.67), SIMDE_FLOAT32_C(620.28), - SIMDE_FLOAT32_C(-606.51), SIMDE_FLOAT32_C(-327.77)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-44964.726562), SIMDE_FLOAT32_C(-44412.597656)}}, - {{SIMDE_FLOAT32_C(-331.80), SIMDE_FLOAT32_C(-605.94)}, - {SIMDE_FLOAT32_C(-295.86), SIMDE_FLOAT32_C(-275.42)}, - {SIMDE_FLOAT32_C(-752.64), SIMDE_FLOAT32_C(21.91), - SIMDE_FLOAT32_C(826.83), SIMDE_FLOAT32_C(599.82)}, - INT32_C(1), - {SIMDE_FLOAT32_C(-6814.092285), SIMDE_FLOAT32_C(-7088.232422)}}, - {{SIMDE_FLOAT32_C(-30.36), SIMDE_FLOAT32_C(631.53)}, - {SIMDE_FLOAT32_C(850.75), SIMDE_FLOAT32_C(-263.55)}, - {SIMDE_FLOAT32_C(139.96), SIMDE_FLOAT32_C(859.14), - SIMDE_FLOAT32_C(-834.47), SIMDE_FLOAT32_C(216.10)}, - INT32_C(0), - {SIMDE_FLOAT32_C(119040.617188), SIMDE_FLOAT32_C(119702.507812)}}, - {{SIMDE_FLOAT32_C(995.86), SIMDE_FLOAT32_C(529.74)}, - {SIMDE_FLOAT32_C(79.08), SIMDE_FLOAT32_C(947.13)}, - {SIMDE_FLOAT32_C(122.02), SIMDE_FLOAT32_C(-250.00), - SIMDE_FLOAT32_C(-361.82), SIMDE_FLOAT32_C(265.24)}, - INT32_C(1), - {SIMDE_FLOAT32_C(-18774.140625), SIMDE_FLOAT32_C(-19240.259766)}}, - {{SIMDE_FLOAT32_C(275.71), SIMDE_FLOAT32_C(2.71)}, - {SIMDE_FLOAT32_C(99.79), SIMDE_FLOAT32_C(-137.67)}, - {SIMDE_FLOAT32_C(-761.19), SIMDE_FLOAT32_C(813.19), - SIMDE_FLOAT32_C(-897.68), SIMDE_FLOAT32_C(653.58)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-75683.437500), SIMDE_FLOAT32_C(-75956.437500)}}, - {{SIMDE_FLOAT32_C(396.02), SIMDE_FLOAT32_C(413.06)}, - {SIMDE_FLOAT32_C(514.09), SIMDE_FLOAT32_C(-977.67)}, - {SIMDE_FLOAT32_C(-671.79), SIMDE_FLOAT32_C(-92.13), - SIMDE_FLOAT32_C(-441.32), SIMDE_FLOAT32_C(-374.27)}, - INT32_C(1), - {SIMDE_FLOAT32_C(-46967.093750), SIMDE_FLOAT32_C(-46950.054688)}}, - {{SIMDE_FLOAT32_C(-151.97), SIMDE_FLOAT32_C(-79.55)}, - {SIMDE_FLOAT32_C(-214.62), SIMDE_FLOAT32_C(-614.75)}, - {SIMDE_FLOAT32_C(678.79), SIMDE_FLOAT32_C(783.83), - SIMDE_FLOAT32_C(493.05), SIMDE_FLOAT32_C(-896.00)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-145833.875000), SIMDE_FLOAT32_C(-145761.453125)}}, - {{SIMDE_FLOAT32_C(104.13), SIMDE_FLOAT32_C(278.54)}, - {SIMDE_FLOAT32_C(171.54), SIMDE_FLOAT32_C(-682.63)}, - {SIMDE_FLOAT32_C(217.09), SIMDE_FLOAT32_C(49.35), - SIMDE_FLOAT32_C(256.50), SIMDE_FLOAT32_C(-92.04)}, - INT32_C(1), - {SIMDE_FLOAT32_C(8569.627930), SIMDE_FLOAT32_C(8744.038086)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT32_C( -470.26), SIMDE_FLOAT32_C( 81.87) }, + { SIMDE_FLOAT32_C( 72.27), SIMDE_FLOAT32_C( -992.59) }, + { SIMDE_FLOAT32_C( -615.67), SIMDE_FLOAT32_C( 620.28), SIMDE_FLOAT32_C( -606.51), SIMDE_FLOAT32_C( -327.77) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-44964.726562), SIMDE_FLOAT32_C(-44412.597656) } }, + { { SIMDE_FLOAT32_C( -331.80), SIMDE_FLOAT32_C( -605.94) }, + { SIMDE_FLOAT32_C( -295.86), SIMDE_FLOAT32_C( -275.42) }, + { SIMDE_FLOAT32_C( -752.64), SIMDE_FLOAT32_C( 21.91), SIMDE_FLOAT32_C( 826.83), SIMDE_FLOAT32_C( 599.82) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-6814.092285), SIMDE_FLOAT32_C(-7088.232422) } }, + { { SIMDE_FLOAT32_C( -30.36), SIMDE_FLOAT32_C( 631.53) }, + { SIMDE_FLOAT32_C( 850.75), SIMDE_FLOAT32_C( -263.55) }, + { SIMDE_FLOAT32_C( 139.96), SIMDE_FLOAT32_C( 859.14), SIMDE_FLOAT32_C( -834.47), SIMDE_FLOAT32_C( 216.10) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(119040.617188), SIMDE_FLOAT32_C(119702.507812) } }, + { { SIMDE_FLOAT32_C( 995.86), SIMDE_FLOAT32_C( 529.74) }, + { SIMDE_FLOAT32_C( 79.08), SIMDE_FLOAT32_C( 947.13) }, + { SIMDE_FLOAT32_C( 122.02), SIMDE_FLOAT32_C( -250.00), SIMDE_FLOAT32_C( -361.82), SIMDE_FLOAT32_C( 265.24) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-18774.140625), SIMDE_FLOAT32_C(-19240.259766) } }, + { { SIMDE_FLOAT32_C( 275.71), SIMDE_FLOAT32_C( 2.71) }, + { SIMDE_FLOAT32_C( 99.79), SIMDE_FLOAT32_C( -137.67) }, + { SIMDE_FLOAT32_C( -761.19), SIMDE_FLOAT32_C( 813.19), SIMDE_FLOAT32_C( -897.68), SIMDE_FLOAT32_C( 653.58) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-75683.437500), SIMDE_FLOAT32_C(-75956.437500) } }, + { { SIMDE_FLOAT32_C( 396.02), SIMDE_FLOAT32_C( 413.06) }, + { SIMDE_FLOAT32_C( 514.09), SIMDE_FLOAT32_C( -977.67) }, + { SIMDE_FLOAT32_C( -671.79), SIMDE_FLOAT32_C( -92.13), SIMDE_FLOAT32_C( -441.32), SIMDE_FLOAT32_C( -374.27) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-46967.093750), SIMDE_FLOAT32_C(-46950.054688) } }, + { { SIMDE_FLOAT32_C( -151.97), SIMDE_FLOAT32_C( -79.55) }, + { SIMDE_FLOAT32_C( -214.62), SIMDE_FLOAT32_C( -614.75) }, + { SIMDE_FLOAT32_C( 678.79), SIMDE_FLOAT32_C( 783.83), SIMDE_FLOAT32_C( 493.05), SIMDE_FLOAT32_C( -896.00) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-145833.875000), SIMDE_FLOAT32_C(-145761.453125) } }, + { { SIMDE_FLOAT32_C( 104.13), SIMDE_FLOAT32_C( 278.54) }, + { SIMDE_FLOAT32_C( 171.54), SIMDE_FLOAT32_C( -682.63) }, + { SIMDE_FLOAT32_C( 217.09), SIMDE_FLOAT32_C( 49.35), SIMDE_FLOAT32_C( 256.50), SIMDE_FLOAT32_C( -92.04) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(8569.627930), SIMDE_FLOAT32_C(8744.038086) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x2_t r; - SIMDE_CONSTIFY_2_( - simde_vcmla_laneq_f32, r, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), - test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_(simde_vcmla_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vcmla_laneq_f32(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -425,7 +341,8 @@ static int test_simde_vcmla_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmlaq_lane_f16(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmlaq_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float16_t r_[8]; @@ -435,154 +352,95 @@ static int test_simde_vcmlaq_lane_f16(SIMDE_MUNIT_TEST_ARGS) { simde_float16_t r[8]; } test_vec[] = { - {{SIMDE_FLOAT16_VALUE(226.38), SIMDE_FLOAT16_VALUE(178.88), - SIMDE_FLOAT16_VALUE(181.75), SIMDE_FLOAT16_VALUE(420.50), - SIMDE_FLOAT16_VALUE(-699.00), SIMDE_FLOAT16_VALUE(-151.12), - SIMDE_FLOAT16_VALUE(-388.25), SIMDE_FLOAT16_VALUE(350.75)}, - {SIMDE_FLOAT16_VALUE(890.50), SIMDE_FLOAT16_VALUE(-665.00), - SIMDE_FLOAT16_VALUE(-618.00), SIMDE_FLOAT16_VALUE(190.88), - SIMDE_FLOAT16_VALUE(218.12), SIMDE_FLOAT16_VALUE(-329.00), - SIMDE_FLOAT16_VALUE(-549.50), SIMDE_FLOAT16_VALUE(-460.00)}, - {SIMDE_FLOAT16_VALUE(-49.28), SIMDE_FLOAT16_VALUE(-695.50), - SIMDE_FLOAT16_VALUE(121.50), SIMDE_FLOAT16_VALUE(-78.44)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-43648.000000), SIMDE_FLOAT16_VALUE(-43712.000000), - SIMDE_FLOAT16_VALUE(30640.000000), SIMDE_FLOAT16_VALUE(30880.000000), - SIMDE_FLOAT16_VALUE(-11448.000000), SIMDE_FLOAT16_VALUE(-10904.000000), - SIMDE_FLOAT16_VALUE(26688.000000), SIMDE_FLOAT16_VALUE(27424.000000)}}, - {{SIMDE_FLOAT16_VALUE(-869.50), SIMDE_FLOAT16_VALUE(-710.00), - SIMDE_FLOAT16_VALUE(457.25), SIMDE_FLOAT16_VALUE(579.00), - SIMDE_FLOAT16_VALUE(-786.50), SIMDE_FLOAT16_VALUE(-974.00), - SIMDE_FLOAT16_VALUE(-570.50), SIMDE_FLOAT16_VALUE(369.25)}, - {SIMDE_FLOAT16_VALUE(48.06), SIMDE_FLOAT16_VALUE(-640.00), - SIMDE_FLOAT16_VALUE(-552.00), SIMDE_FLOAT16_VALUE(75.88), - SIMDE_FLOAT16_VALUE(352.25), SIMDE_FLOAT16_VALUE(324.25), - SIMDE_FLOAT16_VALUE(485.50), SIMDE_FLOAT16_VALUE(943.50)}, - {SIMDE_FLOAT16_VALUE(-976.00), SIMDE_FLOAT16_VALUE(89.44), - SIMDE_FLOAT16_VALUE(-100.62), SIMDE_FLOAT16_VALUE(-463.75)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(3430.000000), SIMDE_FLOAT16_VALUE(3588.000000), - SIMDE_FLOAT16_VALUE(-48928.000000), SIMDE_FLOAT16_VALUE(-48800.000000), - SIMDE_FLOAT16_VALUE(30720.000000), SIMDE_FLOAT16_VALUE(30528.000000), - SIMDE_FLOAT16_VALUE(42848.000000), SIMDE_FLOAT16_VALUE(43776.000000)}}, - {{SIMDE_FLOAT16_VALUE(-973.00), SIMDE_FLOAT16_VALUE(-159.62), - SIMDE_FLOAT16_VALUE(-377.00), SIMDE_FLOAT16_VALUE(-853.00), - SIMDE_FLOAT16_VALUE(831.00), SIMDE_FLOAT16_VALUE(661.00), - SIMDE_FLOAT16_VALUE(-717.00), SIMDE_FLOAT16_VALUE(891.50)}, - {SIMDE_FLOAT16_VALUE(535.50), SIMDE_FLOAT16_VALUE(621.50), - SIMDE_FLOAT16_VALUE(578.50), SIMDE_FLOAT16_VALUE(430.50), - SIMDE_FLOAT16_VALUE(-574.50), SIMDE_FLOAT16_VALUE(-319.75), - SIMDE_FLOAT16_VALUE(-735.00), SIMDE_FLOAT16_VALUE(248.50)}, - {SIMDE_FLOAT16_VALUE(32.59), SIMDE_FLOAT16_VALUE(192.38), - SIMDE_FLOAT16_VALUE(679.00), SIMDE_FLOAT16_VALUE(502.25)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(16480.000000), SIMDE_FLOAT16_VALUE(17296.000000), - SIMDE_FLOAT16_VALUE(18480.000000), SIMDE_FLOAT16_VALUE(18000.000000), - SIMDE_FLOAT16_VALUE(-17888.000000), SIMDE_FLOAT16_VALUE(-18064.000000), - SIMDE_FLOAT16_VALUE(-24672.000000), - SIMDE_FLOAT16_VALUE(-23072.000000)}}, - {{SIMDE_FLOAT16_VALUE(-618.50), SIMDE_FLOAT16_VALUE(-70.81), - SIMDE_FLOAT16_VALUE(-596.50), SIMDE_FLOAT16_VALUE(810.50), - SIMDE_FLOAT16_VALUE(863.50), SIMDE_FLOAT16_VALUE(828.50), - SIMDE_FLOAT16_VALUE(-563.50), SIMDE_FLOAT16_VALUE(-576.50)}, - {SIMDE_FLOAT16_VALUE(-703.50), SIMDE_FLOAT16_VALUE(384.00), - SIMDE_FLOAT16_VALUE(-772.50), SIMDE_FLOAT16_VALUE(457.50), - SIMDE_FLOAT16_VALUE(296.00), SIMDE_FLOAT16_VALUE(653.00), - SIMDE_FLOAT16_VALUE(-121.00), SIMDE_FLOAT16_VALUE(945.50)}, - {SIMDE_FLOAT16_VALUE(-280.75), SIMDE_FLOAT16_VALUE(-31.45), - SIMDE_FLOAT16_VALUE(688.50), SIMDE_FLOAT16_VALUE(192.00)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(21504.000000), SIMDE_FLOAT16_VALUE(22064.000000), - SIMDE_FLOAT16_VALUE(23696.000000), SIMDE_FLOAT16_VALUE(25104.000000), - SIMDE_FLOAT16_VALUE(-8448.000000), SIMDE_FLOAT16_VALUE(-8480.000000), - SIMDE_FLOAT16_VALUE(3242.000000), SIMDE_FLOAT16_VALUE(3230.000000)}}, - {{SIMDE_FLOAT16_VALUE(-523.00), SIMDE_FLOAT16_VALUE(-323.75), - SIMDE_FLOAT16_VALUE(-888.00), SIMDE_FLOAT16_VALUE(-283.75), - SIMDE_FLOAT16_VALUE(-117.75), SIMDE_FLOAT16_VALUE(-841.50), - SIMDE_FLOAT16_VALUE(665.00), SIMDE_FLOAT16_VALUE(-987.00)}, - {SIMDE_FLOAT16_VALUE(-643.00), SIMDE_FLOAT16_VALUE(-152.12), - SIMDE_FLOAT16_VALUE(964.00), SIMDE_FLOAT16_VALUE(920.00), - SIMDE_FLOAT16_VALUE(630.50), SIMDE_FLOAT16_VALUE(-669.50), - SIMDE_FLOAT16_VALUE(671.00), SIMDE_FLOAT16_VALUE(257.00)}, - {SIMDE_FLOAT16_VALUE(10.22), SIMDE_FLOAT16_VALUE(-857.50), - SIMDE_FLOAT16_VALUE(334.75), SIMDE_FLOAT16_VALUE(-617.50)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-7092.000000), SIMDE_FLOAT16_VALUE(-6896.000000), - SIMDE_FLOAT16_VALUE(8960.000000), SIMDE_FLOAT16_VALUE(9568.000000), - SIMDE_FLOAT16_VALUE(6324.000000), SIMDE_FLOAT16_VALUE(5600.000000), - SIMDE_FLOAT16_VALUE(7520.000000), SIMDE_FLOAT16_VALUE(5868.000000)}}, - {{SIMDE_FLOAT16_VALUE(-439.50), SIMDE_FLOAT16_VALUE(245.12), - SIMDE_FLOAT16_VALUE(111.06), SIMDE_FLOAT16_VALUE(520.50), - SIMDE_FLOAT16_VALUE(85.50), SIMDE_FLOAT16_VALUE(250.25), - SIMDE_FLOAT16_VALUE(-680.00), SIMDE_FLOAT16_VALUE(-750.00)}, - {SIMDE_FLOAT16_VALUE(-138.25), SIMDE_FLOAT16_VALUE(-14.62), - SIMDE_FLOAT16_VALUE(-921.50), SIMDE_FLOAT16_VALUE(225.88), - SIMDE_FLOAT16_VALUE(242.88), SIMDE_FLOAT16_VALUE(869.50), - SIMDE_FLOAT16_VALUE(298.00), SIMDE_FLOAT16_VALUE(105.69)}, - {SIMDE_FLOAT16_VALUE(-722.50), SIMDE_FLOAT16_VALUE(-8.75), - SIMDE_FLOAT16_VALUE(-245.75), SIMDE_FLOAT16_VALUE(915.50)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(770.000000), SIMDE_FLOAT16_VALUE(1455.000000), - SIMDE_FLOAT16_VALUE(8176.000000), SIMDE_FLOAT16_VALUE(8584.000000), - SIMDE_FLOAT16_VALUE(-2040.000000), SIMDE_FLOAT16_VALUE(-1875.000000), - SIMDE_FLOAT16_VALUE(-3288.000000), SIMDE_FLOAT16_VALUE(-3358.000000)}}, - {{SIMDE_FLOAT16_VALUE(54.19), SIMDE_FLOAT16_VALUE(-928.00), - SIMDE_FLOAT16_VALUE(362.50), SIMDE_FLOAT16_VALUE(-936.50), - SIMDE_FLOAT16_VALUE(185.88), SIMDE_FLOAT16_VALUE(-244.38), - SIMDE_FLOAT16_VALUE(924.50), SIMDE_FLOAT16_VALUE(-644.00)}, - {SIMDE_FLOAT16_VALUE(-517.00), SIMDE_FLOAT16_VALUE(-615.00), - SIMDE_FLOAT16_VALUE(-751.50), SIMDE_FLOAT16_VALUE(-974.00), - SIMDE_FLOAT16_VALUE(-144.38), SIMDE_FLOAT16_VALUE(338.25), - SIMDE_FLOAT16_VALUE(705.00), SIMDE_FLOAT16_VALUE(116.88)}, - {SIMDE_FLOAT16_VALUE(49.38), SIMDE_FLOAT16_VALUE(-363.00), - SIMDE_FLOAT16_VALUE(-476.25), SIMDE_FLOAT16_VALUE(106.69)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-25472.000000), SIMDE_FLOAT16_VALUE(-26448.000000), - SIMDE_FLOAT16_VALUE(-36736.000000), SIMDE_FLOAT16_VALUE(-38048.000000), - SIMDE_FLOAT16_VALUE(-6944.000000), SIMDE_FLOAT16_VALUE(-7372.000000), - SIMDE_FLOAT16_VALUE(35744.000000), SIMDE_FLOAT16_VALUE(34176.000000)}}, - {{SIMDE_FLOAT16_VALUE(-726.00), SIMDE_FLOAT16_VALUE(-353.75), - SIMDE_FLOAT16_VALUE(268.50), SIMDE_FLOAT16_VALUE(729.00), - SIMDE_FLOAT16_VALUE(-470.25), SIMDE_FLOAT16_VALUE(81.88), - SIMDE_FLOAT16_VALUE(72.25), SIMDE_FLOAT16_VALUE(-992.50)}, - {SIMDE_FLOAT16_VALUE(-615.50), SIMDE_FLOAT16_VALUE(620.50), - SIMDE_FLOAT16_VALUE(-606.50), SIMDE_FLOAT16_VALUE(-327.75), - SIMDE_FLOAT16_VALUE(-331.75), SIMDE_FLOAT16_VALUE(-606.00), - SIMDE_FLOAT16_VALUE(-295.75), SIMDE_FLOAT16_VALUE(-275.50)}, - {SIMDE_FLOAT16_VALUE(-752.50), SIMDE_FLOAT16_VALUE(21.91), - SIMDE_FLOAT16_VALUE(827.00), SIMDE_FLOAT16_VALUE(600.00)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-14208.000000), SIMDE_FLOAT16_VALUE(-13840.000000), - SIMDE_FLOAT16_VALUE(-13016.000000), SIMDE_FLOAT16_VALUE(-12560.000000), - SIMDE_FLOAT16_VALUE(-7736.000000), SIMDE_FLOAT16_VALUE(-7184.000000), - SIMDE_FLOAT16_VALUE(-6408.000000), SIMDE_FLOAT16_VALUE(-7472.000000)}} - + { { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( 420.50), + SIMDE_FLOAT16_VALUE( -699.00), SIMDE_FLOAT16_VALUE( -151.12), SIMDE_FLOAT16_VALUE( -388.25), SIMDE_FLOAT16_VALUE( 350.75) }, + { SIMDE_FLOAT16_VALUE( 890.50), SIMDE_FLOAT16_VALUE( -665.00), SIMDE_FLOAT16_VALUE( -618.00), SIMDE_FLOAT16_VALUE( 190.88), + SIMDE_FLOAT16_VALUE( 218.12), SIMDE_FLOAT16_VALUE( -329.00), SIMDE_FLOAT16_VALUE( -549.50), SIMDE_FLOAT16_VALUE( -460.00) }, + { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -695.50), SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-43648.000000), SIMDE_FLOAT16_VALUE(-43712.000000), SIMDE_FLOAT16_VALUE(30640.000000), SIMDE_FLOAT16_VALUE(30880.000000), + SIMDE_FLOAT16_VALUE(-11448.000000), SIMDE_FLOAT16_VALUE(-10904.000000), SIMDE_FLOAT16_VALUE(26688.000000), SIMDE_FLOAT16_VALUE(27424.000000) } }, + { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00), + SIMDE_FLOAT16_VALUE( -786.50), SIMDE_FLOAT16_VALUE( -974.00), SIMDE_FLOAT16_VALUE( -570.50), SIMDE_FLOAT16_VALUE( 369.25) }, + { SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( -640.00), SIMDE_FLOAT16_VALUE( -552.00), SIMDE_FLOAT16_VALUE( 75.88), + SIMDE_FLOAT16_VALUE( 352.25), SIMDE_FLOAT16_VALUE( 324.25), SIMDE_FLOAT16_VALUE( 485.50), SIMDE_FLOAT16_VALUE( 943.50) }, + { SIMDE_FLOAT16_VALUE( -976.00), SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -100.62), SIMDE_FLOAT16_VALUE( -463.75) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(3430.000000), SIMDE_FLOAT16_VALUE(3588.000000), SIMDE_FLOAT16_VALUE(-48928.000000), SIMDE_FLOAT16_VALUE(-48800.000000), + SIMDE_FLOAT16_VALUE(30720.000000), SIMDE_FLOAT16_VALUE(30528.000000), SIMDE_FLOAT16_VALUE(42848.000000), SIMDE_FLOAT16_VALUE(43776.000000) } }, + { { SIMDE_FLOAT16_VALUE( -973.00), SIMDE_FLOAT16_VALUE( -159.62), SIMDE_FLOAT16_VALUE( -377.00), SIMDE_FLOAT16_VALUE( -853.00), + SIMDE_FLOAT16_VALUE( 831.00), SIMDE_FLOAT16_VALUE( 661.00), SIMDE_FLOAT16_VALUE( -717.00), SIMDE_FLOAT16_VALUE( 891.50) }, + { SIMDE_FLOAT16_VALUE( 535.50), SIMDE_FLOAT16_VALUE( 621.50), SIMDE_FLOAT16_VALUE( 578.50), SIMDE_FLOAT16_VALUE( 430.50), + SIMDE_FLOAT16_VALUE( -574.50), SIMDE_FLOAT16_VALUE( -319.75), SIMDE_FLOAT16_VALUE( -735.00), SIMDE_FLOAT16_VALUE( 248.50) }, + { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 192.38), SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 502.25) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(16480.000000), SIMDE_FLOAT16_VALUE(17296.000000), SIMDE_FLOAT16_VALUE(18480.000000), SIMDE_FLOAT16_VALUE(18000.000000), + SIMDE_FLOAT16_VALUE(-17888.000000), SIMDE_FLOAT16_VALUE(-18064.000000), SIMDE_FLOAT16_VALUE(-24672.000000), SIMDE_FLOAT16_VALUE(-23072.000000) } }, + { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( -70.81), SIMDE_FLOAT16_VALUE( -596.50), SIMDE_FLOAT16_VALUE( 810.50), + SIMDE_FLOAT16_VALUE( 863.50), SIMDE_FLOAT16_VALUE( 828.50), SIMDE_FLOAT16_VALUE( -563.50), SIMDE_FLOAT16_VALUE( -576.50) }, + { SIMDE_FLOAT16_VALUE( -703.50), SIMDE_FLOAT16_VALUE( 384.00), SIMDE_FLOAT16_VALUE( -772.50), SIMDE_FLOAT16_VALUE( 457.50), + SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( 653.00), SIMDE_FLOAT16_VALUE( -121.00), SIMDE_FLOAT16_VALUE( 945.50) }, + { SIMDE_FLOAT16_VALUE( -280.75), SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( 688.50), SIMDE_FLOAT16_VALUE( 192.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(21504.000000), SIMDE_FLOAT16_VALUE(22064.000000), SIMDE_FLOAT16_VALUE(23696.000000), SIMDE_FLOAT16_VALUE(25104.000000), + SIMDE_FLOAT16_VALUE(-8448.000000), SIMDE_FLOAT16_VALUE(-8480.000000), SIMDE_FLOAT16_VALUE(3242.000000), SIMDE_FLOAT16_VALUE(3230.000000) } }, + { { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -323.75), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -283.75), + SIMDE_FLOAT16_VALUE( -117.75), SIMDE_FLOAT16_VALUE( -841.50), SIMDE_FLOAT16_VALUE( 665.00), SIMDE_FLOAT16_VALUE( -987.00) }, + { SIMDE_FLOAT16_VALUE( -643.00), SIMDE_FLOAT16_VALUE( -152.12), SIMDE_FLOAT16_VALUE( 964.00), SIMDE_FLOAT16_VALUE( 920.00), + SIMDE_FLOAT16_VALUE( 630.50), SIMDE_FLOAT16_VALUE( -669.50), SIMDE_FLOAT16_VALUE( 671.00), SIMDE_FLOAT16_VALUE( 257.00) }, + { SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -857.50), SIMDE_FLOAT16_VALUE( 334.75), SIMDE_FLOAT16_VALUE( -617.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-7092.000000), SIMDE_FLOAT16_VALUE(-6896.000000), SIMDE_FLOAT16_VALUE(8960.000000), SIMDE_FLOAT16_VALUE(9568.000000), + SIMDE_FLOAT16_VALUE(6324.000000), SIMDE_FLOAT16_VALUE(5600.000000), SIMDE_FLOAT16_VALUE(7520.000000), SIMDE_FLOAT16_VALUE(5868.000000) } }, + { { SIMDE_FLOAT16_VALUE( -439.50), SIMDE_FLOAT16_VALUE( 245.12), SIMDE_FLOAT16_VALUE( 111.06), SIMDE_FLOAT16_VALUE( 520.50), + SIMDE_FLOAT16_VALUE( 85.50), SIMDE_FLOAT16_VALUE( 250.25), SIMDE_FLOAT16_VALUE( -680.00), SIMDE_FLOAT16_VALUE( -750.00) }, + { SIMDE_FLOAT16_VALUE( -138.25), SIMDE_FLOAT16_VALUE( -14.62), SIMDE_FLOAT16_VALUE( -921.50), SIMDE_FLOAT16_VALUE( 225.88), + SIMDE_FLOAT16_VALUE( 242.88), SIMDE_FLOAT16_VALUE( 869.50), SIMDE_FLOAT16_VALUE( 298.00), SIMDE_FLOAT16_VALUE( 105.69) }, + { SIMDE_FLOAT16_VALUE( -722.50), SIMDE_FLOAT16_VALUE( -8.75), SIMDE_FLOAT16_VALUE( -245.75), SIMDE_FLOAT16_VALUE( 915.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(770.000000), SIMDE_FLOAT16_VALUE(1455.000000), SIMDE_FLOAT16_VALUE(8176.000000), SIMDE_FLOAT16_VALUE(8584.000000), + SIMDE_FLOAT16_VALUE(-2040.000000), SIMDE_FLOAT16_VALUE(-1875.000000), SIMDE_FLOAT16_VALUE(-3288.000000), SIMDE_FLOAT16_VALUE(-3358.000000) } }, + { { SIMDE_FLOAT16_VALUE( 54.19), SIMDE_FLOAT16_VALUE( -928.00), SIMDE_FLOAT16_VALUE( 362.50), SIMDE_FLOAT16_VALUE( -936.50), + SIMDE_FLOAT16_VALUE( 185.88), SIMDE_FLOAT16_VALUE( -244.38), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( -644.00) }, + { SIMDE_FLOAT16_VALUE( -517.00), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -751.50), SIMDE_FLOAT16_VALUE( -974.00), + SIMDE_FLOAT16_VALUE( -144.38), SIMDE_FLOAT16_VALUE( 338.25), SIMDE_FLOAT16_VALUE( 705.00), SIMDE_FLOAT16_VALUE( 116.88) }, + { SIMDE_FLOAT16_VALUE( 49.38), SIMDE_FLOAT16_VALUE( -363.00), SIMDE_FLOAT16_VALUE( -476.25), SIMDE_FLOAT16_VALUE( 106.69) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-25472.000000), SIMDE_FLOAT16_VALUE(-26448.000000), SIMDE_FLOAT16_VALUE(-36736.000000), SIMDE_FLOAT16_VALUE(-38048.000000), + SIMDE_FLOAT16_VALUE(-6944.000000), SIMDE_FLOAT16_VALUE(-7372.000000), SIMDE_FLOAT16_VALUE(35744.000000), SIMDE_FLOAT16_VALUE(34176.000000) } }, + { { SIMDE_FLOAT16_VALUE( -726.00), SIMDE_FLOAT16_VALUE( -353.75), SIMDE_FLOAT16_VALUE( 268.50), SIMDE_FLOAT16_VALUE( 729.00), + SIMDE_FLOAT16_VALUE( -470.25), SIMDE_FLOAT16_VALUE( 81.88), SIMDE_FLOAT16_VALUE( 72.25), SIMDE_FLOAT16_VALUE( -992.50) }, + { SIMDE_FLOAT16_VALUE( -615.50), SIMDE_FLOAT16_VALUE( 620.50), SIMDE_FLOAT16_VALUE( -606.50), SIMDE_FLOAT16_VALUE( -327.75), + SIMDE_FLOAT16_VALUE( -331.75), SIMDE_FLOAT16_VALUE( -606.00), SIMDE_FLOAT16_VALUE( -295.75), SIMDE_FLOAT16_VALUE( -275.50) }, + { SIMDE_FLOAT16_VALUE( -752.50), SIMDE_FLOAT16_VALUE( 21.91), SIMDE_FLOAT16_VALUE( 827.00), SIMDE_FLOAT16_VALUE( 600.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-14208.000000), SIMDE_FLOAT16_VALUE(-13840.000000), SIMDE_FLOAT16_VALUE(-13016.000000), SIMDE_FLOAT16_VALUE(-12560.000000), + SIMDE_FLOAT16_VALUE(-7736.000000), SIMDE_FLOAT16_VALUE(-7184.000000), SIMDE_FLOAT16_VALUE(-6408.000000), SIMDE_FLOAT16_VALUE(-7472.000000) } } + }; - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x8_t r; - SIMDE_CONSTIFY_2_( - simde_vcmlaq_lane_f16, r, - (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - test_vec[i].lane, r_, a, b); - - simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), - 1); + SIMDE_CONSTIFY_2_(simde_vcmlaq_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + } - + return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x8_t r = simde_vcmlaq_lane_f16(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -593,7 +451,8 @@ static int test_simde_vcmlaq_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmlaq_lane_f32(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmlaq_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[4]; @@ -602,90 +461,66 @@ static int test_simde_vcmlaq_lane_f32(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[4]; } test_vec[] = { - {{SIMDE_FLOAT32_C(301.65), SIMDE_FLOAT32_C(490.71), - SIMDE_FLOAT32_C(-744.66), SIMDE_FLOAT32_C(-738.17)}, - {SIMDE_FLOAT32_C(-301.20), SIMDE_FLOAT32_C(-904.34), - SIMDE_FLOAT32_C(771.98), SIMDE_FLOAT32_C(233.71)}, - {SIMDE_FLOAT32_C(830.18), SIMDE_FLOAT32_C(979.39)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-249748.578125), SIMDE_FLOAT32_C(-249559.515625), - SIMDE_FLOAT32_C(640137.687500), SIMDE_FLOAT32_C(640144.187500)}}, - {{SIMDE_FLOAT32_C(-38.01), SIMDE_FLOAT32_C(151.61), - SIMDE_FLOAT32_C(201.45), SIMDE_FLOAT32_C(-747.32)}, - {SIMDE_FLOAT32_C(-331.17), SIMDE_FLOAT32_C(7.62), - SIMDE_FLOAT32_C(-454.77), SIMDE_FLOAT32_C(-381.81)}, - {SIMDE_FLOAT32_C(236.39), SIMDE_FLOAT32_C(-158.94)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-78323.289062), SIMDE_FLOAT32_C(-78133.671875), - SIMDE_FLOAT32_C(-107301.625000), SIMDE_FLOAT32_C(-108250.398438)}}, - {{SIMDE_FLOAT32_C(605.85), SIMDE_FLOAT32_C(244.27), - SIMDE_FLOAT32_C(-426.53), SIMDE_FLOAT32_C(-969.18)}, - {SIMDE_FLOAT32_C(322.13), SIMDE_FLOAT32_C(863.77), - SIMDE_FLOAT32_C(-685.35), SIMDE_FLOAT32_C(-710.70)}, - {SIMDE_FLOAT32_C(-791.12), SIMDE_FLOAT32_C(373.53)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-254237.640625), SIMDE_FLOAT32_C(-254599.218750), - SIMDE_FLOAT32_C(541767.562500), SIMDE_FLOAT32_C(541224.875000)}}, - {{SIMDE_FLOAT32_C(-606.46), SIMDE_FLOAT32_C(-507.55), - SIMDE_FLOAT32_C(-68.24), SIMDE_FLOAT32_C(-823.05)}, - {SIMDE_FLOAT32_C(-359.95), SIMDE_FLOAT32_C(611.92), - SIMDE_FLOAT32_C(514.14), SIMDE_FLOAT32_C(-660.86)}, - {SIMDE_FLOAT32_C(181.81), SIMDE_FLOAT32_C(115.86)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-66048.968750), SIMDE_FLOAT32_C(-65950.062500), - SIMDE_FLOAT32_C(93407.554688), SIMDE_FLOAT32_C(92652.742188)}}, - {{SIMDE_FLOAT32_C(951.84), SIMDE_FLOAT32_C(-864.96), - SIMDE_FLOAT32_C(970.59), SIMDE_FLOAT32_C(769.97)}, - {SIMDE_FLOAT32_C(609.64), SIMDE_FLOAT32_C(-580.87), - SIMDE_FLOAT32_C(358.59), SIMDE_FLOAT32_C(350.56)}, - {SIMDE_FLOAT32_C(26.31), SIMDE_FLOAT32_C(163.74)}, - INT32_C(0), - {SIMDE_FLOAT32_C(16991.468750), SIMDE_FLOAT32_C(15174.667969), - SIMDE_FLOAT32_C(10405.092773), SIMDE_FLOAT32_C(10204.472656)}}, - {{SIMDE_FLOAT32_C(-636.68), SIMDE_FLOAT32_C(554.68), - SIMDE_FLOAT32_C(-385.40), SIMDE_FLOAT32_C(-565.95)}, - {SIMDE_FLOAT32_C(129.69), SIMDE_FLOAT32_C(961.79), - SIMDE_FLOAT32_C(-333.22), SIMDE_FLOAT32_C(69.65)}, - {SIMDE_FLOAT32_C(726.96), SIMDE_FLOAT32_C(131.41)}, - INT32_C(0), - {SIMDE_FLOAT32_C(93642.765625), SIMDE_FLOAT32_C(94834.125000), - SIMDE_FLOAT32_C(-242623.015625), SIMDE_FLOAT32_C(-242803.562500)}}, - {{SIMDE_FLOAT32_C(-211.91), SIMDE_FLOAT32_C(829.24), - SIMDE_FLOAT32_C(-475.13), SIMDE_FLOAT32_C(562.33)}, - {SIMDE_FLOAT32_C(515.59), SIMDE_FLOAT32_C(-290.69), - SIMDE_FLOAT32_C(-816.53), SIMDE_FLOAT32_C(17.39)}, - {SIMDE_FLOAT32_C(268.14), SIMDE_FLOAT32_C(729.88)}, - INT32_C(0), - {SIMDE_FLOAT32_C(138038.406250), SIMDE_FLOAT32_C(139079.562500), - SIMDE_FLOAT32_C(-219419.500000), SIMDE_FLOAT32_C(-218382.046875)}}, - {{SIMDE_FLOAT32_C(-894.99), SIMDE_FLOAT32_C(516.42), - SIMDE_FLOAT32_C(-169.55), SIMDE_FLOAT32_C(696.41)}, - {SIMDE_FLOAT32_C(-388.51), SIMDE_FLOAT32_C(987.71), - SIMDE_FLOAT32_C(-91.49), SIMDE_FLOAT32_C(-970.85)}, - {SIMDE_FLOAT32_C(357.28), SIMDE_FLOAT32_C(-28.01)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-139701.843750), SIMDE_FLOAT32_C(-138290.437500), - SIMDE_FLOAT32_C(-32857.097656), SIMDE_FLOAT32_C(-31991.136719)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT32_C( 301.65), SIMDE_FLOAT32_C( 490.71), SIMDE_FLOAT32_C( -744.66), SIMDE_FLOAT32_C( -738.17) }, + { SIMDE_FLOAT32_C( -301.20), SIMDE_FLOAT32_C( -904.34), SIMDE_FLOAT32_C( 771.98), SIMDE_FLOAT32_C( 233.71) }, + { SIMDE_FLOAT32_C( 830.18), SIMDE_FLOAT32_C( 979.39) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-249748.578125), SIMDE_FLOAT32_C(-249559.515625), SIMDE_FLOAT32_C(640137.687500), SIMDE_FLOAT32_C(640144.187500) } }, + { { SIMDE_FLOAT32_C( -38.01), SIMDE_FLOAT32_C( 151.61), SIMDE_FLOAT32_C( 201.45), SIMDE_FLOAT32_C( -747.32) }, + { SIMDE_FLOAT32_C( -331.17), SIMDE_FLOAT32_C( 7.62), SIMDE_FLOAT32_C( -454.77), SIMDE_FLOAT32_C( -381.81) }, + { SIMDE_FLOAT32_C( 236.39), SIMDE_FLOAT32_C( -158.94) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-78323.289062), SIMDE_FLOAT32_C(-78133.671875), SIMDE_FLOAT32_C(-107301.625000), SIMDE_FLOAT32_C(-108250.398438) } }, + { { SIMDE_FLOAT32_C( 605.85), SIMDE_FLOAT32_C( 244.27), SIMDE_FLOAT32_C( -426.53), SIMDE_FLOAT32_C( -969.18) }, + { SIMDE_FLOAT32_C( 322.13), SIMDE_FLOAT32_C( 863.77), SIMDE_FLOAT32_C( -685.35), SIMDE_FLOAT32_C( -710.70) }, + { SIMDE_FLOAT32_C( -791.12), SIMDE_FLOAT32_C( 373.53) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-254237.640625), SIMDE_FLOAT32_C(-254599.218750), SIMDE_FLOAT32_C(541767.562500), SIMDE_FLOAT32_C(541224.875000) } }, + { { SIMDE_FLOAT32_C( -606.46), SIMDE_FLOAT32_C( -507.55), SIMDE_FLOAT32_C( -68.24), SIMDE_FLOAT32_C( -823.05) }, + { SIMDE_FLOAT32_C( -359.95), SIMDE_FLOAT32_C( 611.92), SIMDE_FLOAT32_C( 514.14), SIMDE_FLOAT32_C( -660.86) }, + { SIMDE_FLOAT32_C( 181.81), SIMDE_FLOAT32_C( 115.86) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-66048.968750), SIMDE_FLOAT32_C(-65950.062500), SIMDE_FLOAT32_C(93407.554688), SIMDE_FLOAT32_C(92652.742188) } }, + { { SIMDE_FLOAT32_C( 951.84), SIMDE_FLOAT32_C( -864.96), SIMDE_FLOAT32_C( 970.59), SIMDE_FLOAT32_C( 769.97) }, + { SIMDE_FLOAT32_C( 609.64), SIMDE_FLOAT32_C( -580.87), SIMDE_FLOAT32_C( 358.59), SIMDE_FLOAT32_C( 350.56) }, + { SIMDE_FLOAT32_C( 26.31), SIMDE_FLOAT32_C( 163.74) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(16991.468750), SIMDE_FLOAT32_C(15174.667969), SIMDE_FLOAT32_C(10405.092773), SIMDE_FLOAT32_C(10204.472656) } }, + { { SIMDE_FLOAT32_C( -636.68), SIMDE_FLOAT32_C( 554.68), SIMDE_FLOAT32_C( -385.40), SIMDE_FLOAT32_C( -565.95) }, + { SIMDE_FLOAT32_C( 129.69), SIMDE_FLOAT32_C( 961.79), SIMDE_FLOAT32_C( -333.22), SIMDE_FLOAT32_C( 69.65) }, + { SIMDE_FLOAT32_C( 726.96), SIMDE_FLOAT32_C( 131.41) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(93642.765625), SIMDE_FLOAT32_C(94834.125000), SIMDE_FLOAT32_C(-242623.015625), SIMDE_FLOAT32_C(-242803.562500) } }, + { { SIMDE_FLOAT32_C( -211.91), SIMDE_FLOAT32_C( 829.24), SIMDE_FLOAT32_C( -475.13), SIMDE_FLOAT32_C( 562.33) }, + { SIMDE_FLOAT32_C( 515.59), SIMDE_FLOAT32_C( -290.69), SIMDE_FLOAT32_C( -816.53), SIMDE_FLOAT32_C( 17.39) }, + { SIMDE_FLOAT32_C( 268.14), SIMDE_FLOAT32_C( 729.88) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(138038.406250), SIMDE_FLOAT32_C(139079.562500), SIMDE_FLOAT32_C(-219419.500000), SIMDE_FLOAT32_C(-218382.046875) } }, + { { SIMDE_FLOAT32_C( -894.99), SIMDE_FLOAT32_C( 516.42), SIMDE_FLOAT32_C( -169.55), SIMDE_FLOAT32_C( 696.41) }, + { SIMDE_FLOAT32_C( -388.51), SIMDE_FLOAT32_C( 987.71), SIMDE_FLOAT32_C( -91.49), SIMDE_FLOAT32_C( -970.85) }, + { SIMDE_FLOAT32_C( 357.28), SIMDE_FLOAT32_C( -28.01) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-139701.843750), SIMDE_FLOAT32_C(-138290.437500), SIMDE_FLOAT32_C(-32857.097656), SIMDE_FLOAT32_C(-31991.136719) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x4_t r = simde_vcmlaq_lane_f32(r_, a, b, 0); - simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), - 1); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 0, 0, 0, 0, 0, 0, 0}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcmlaq_lane_f32(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -696,7 +531,8 @@ static int test_simde_vcmlaq_lane_f32(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmlaq_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmlaq_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float16_t r_[8]; @@ -705,170 +541,99 @@ static int test_simde_vcmlaq_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[8]; } test_vec[] = { - {{SIMDE_FLOAT16_VALUE(-30.36), SIMDE_FLOAT16_VALUE(631.50), - SIMDE_FLOAT16_VALUE(851.00), SIMDE_FLOAT16_VALUE(-263.50), - SIMDE_FLOAT16_VALUE(140.00), SIMDE_FLOAT16_VALUE(859.00), - SIMDE_FLOAT16_VALUE(-834.50), SIMDE_FLOAT16_VALUE(216.12)}, - {SIMDE_FLOAT16_VALUE(996.00), SIMDE_FLOAT16_VALUE(529.50), - SIMDE_FLOAT16_VALUE(79.06), SIMDE_FLOAT16_VALUE(947.00), - SIMDE_FLOAT16_VALUE(122.00), SIMDE_FLOAT16_VALUE(-250.00), - SIMDE_FLOAT16_VALUE(-361.75), SIMDE_FLOAT16_VALUE(265.25)}, - {SIMDE_FLOAT16_VALUE(58.66), SIMDE_FLOAT16_VALUE(2.71), - SIMDE_FLOAT16_VALUE(99.81), SIMDE_FLOAT16_VALUE(-137.62), - SIMDE_FLOAT16_VALUE(-761.00), SIMDE_FLOAT16_VALUE(813.00), - SIMDE_FLOAT16_VALUE(-897.50), SIMDE_FLOAT16_VALUE(653.50)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(58400.000000), SIMDE_FLOAT16_VALUE(59040.000000), - SIMDE_FLOAT16_VALUE(5488.000000), SIMDE_FLOAT16_VALUE(4376.000000), - SIMDE_FLOAT16_VALUE(7296.000000), SIMDE_FLOAT16_VALUE(8016.000000), - SIMDE_FLOAT16_VALUE(-22048.000000), - SIMDE_FLOAT16_VALUE(-21008.000000)}}, - {{SIMDE_FLOAT16_VALUE(396.00), SIMDE_FLOAT16_VALUE(413.00), - SIMDE_FLOAT16_VALUE(514.00), SIMDE_FLOAT16_VALUE(-977.50), - SIMDE_FLOAT16_VALUE(-672.00), SIMDE_FLOAT16_VALUE(-92.12), - SIMDE_FLOAT16_VALUE(-441.25), SIMDE_FLOAT16_VALUE(-374.25)}, - {SIMDE_FLOAT16_VALUE(-152.00), SIMDE_FLOAT16_VALUE(-79.56), - SIMDE_FLOAT16_VALUE(-214.62), SIMDE_FLOAT16_VALUE(-615.00), - SIMDE_FLOAT16_VALUE(679.00), SIMDE_FLOAT16_VALUE(784.00), - SIMDE_FLOAT16_VALUE(493.00), SIMDE_FLOAT16_VALUE(-896.00)}, - {SIMDE_FLOAT16_VALUE(104.12), SIMDE_FLOAT16_VALUE(78.50), - SIMDE_FLOAT16_VALUE(171.50), SIMDE_FLOAT16_VALUE(-682.50), - SIMDE_FLOAT16_VALUE(217.12), SIMDE_FLOAT16_VALUE(49.34), - SIMDE_FLOAT16_VALUE(256.50), SIMDE_FLOAT16_VALUE(-92.06)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-11536.000000), SIMDE_FLOAT16_VALUE(-11520.000000), - SIMDE_FLOAT16_VALUE(-16336.000000), SIMDE_FLOAT16_VALUE(-17824.000000), - SIMDE_FLOAT16_VALUE(52640.000000), SIMDE_FLOAT16_VALUE(53216.000000), - SIMDE_FLOAT16_VALUE(38272.000000), SIMDE_FLOAT16_VALUE(38336.000000)}}, - {{SIMDE_FLOAT16_VALUE(-728.00), SIMDE_FLOAT16_VALUE(-108.38), - SIMDE_FLOAT16_VALUE(-77.88), SIMDE_FLOAT16_VALUE(-353.00), - SIMDE_FLOAT16_VALUE(-239.00), SIMDE_FLOAT16_VALUE(704.50), - SIMDE_FLOAT16_VALUE(914.00), SIMDE_FLOAT16_VALUE(-211.12)}, - {SIMDE_FLOAT16_VALUE(-473.25), SIMDE_FLOAT16_VALUE(74.38), - SIMDE_FLOAT16_VALUE(904.50), SIMDE_FLOAT16_VALUE(-290.50), - SIMDE_FLOAT16_VALUE(-796.00), SIMDE_FLOAT16_VALUE(421.25), - SIMDE_FLOAT16_VALUE(215.75), SIMDE_FLOAT16_VALUE(249.38)}, - {SIMDE_FLOAT16_VALUE(-523.00), SIMDE_FLOAT16_VALUE(-720.00), - SIMDE_FLOAT16_VALUE(32.75), SIMDE_FLOAT16_VALUE(-487.75), - SIMDE_FLOAT16_VALUE(-705.50), SIMDE_FLOAT16_VALUE(-468.00), - SIMDE_FLOAT16_VALUE(-789.00), SIMDE_FLOAT16_VALUE(-866.00)}, - INT32_C(2), - {SIMDE_FLOAT16_VALUE(-16224.000000), SIMDE_FLOAT16_VALUE(-15608.000000), - SIMDE_FLOAT16_VALUE(29552.000000), SIMDE_FLOAT16_VALUE(29264.000000), - SIMDE_FLOAT16_VALUE(-26304.000000), SIMDE_FLOAT16_VALUE(-25360.000000), - SIMDE_FLOAT16_VALUE(7980.000000), SIMDE_FLOAT16_VALUE(6856.000000)}}, - {{SIMDE_FLOAT16_VALUE(-891.50), SIMDE_FLOAT16_VALUE(-299.00), - SIMDE_FLOAT16_VALUE(-595.00), SIMDE_FLOAT16_VALUE(-662.00), - SIMDE_FLOAT16_VALUE(-914.00), SIMDE_FLOAT16_VALUE(674.50), - SIMDE_FLOAT16_VALUE(771.50), SIMDE_FLOAT16_VALUE(14.33)}, - {SIMDE_FLOAT16_VALUE(880.00), SIMDE_FLOAT16_VALUE(767.00), - SIMDE_FLOAT16_VALUE(-738.50), SIMDE_FLOAT16_VALUE(581.50), - SIMDE_FLOAT16_VALUE(-342.00), SIMDE_FLOAT16_VALUE(580.50), - SIMDE_FLOAT16_VALUE(534.00), SIMDE_FLOAT16_VALUE(-671.00)}, - {SIMDE_FLOAT16_VALUE(-482.75), SIMDE_FLOAT16_VALUE(382.25), - SIMDE_FLOAT16_VALUE(503.00), SIMDE_FLOAT16_VALUE(35.00), - SIMDE_FLOAT16_VALUE(315.50), SIMDE_FLOAT16_VALUE(-23.56), - SIMDE_FLOAT16_VALUE(53.88), SIMDE_FLOAT16_VALUE(722.00)}, - INT32_C(3), - {SIMDE_FLOAT16_VALUE(29904.000000), SIMDE_FLOAT16_VALUE(30496.000000), - SIMDE_FLOAT16_VALUE(-26448.000000), SIMDE_FLOAT16_VALUE(-26512.000000), - SIMDE_FLOAT16_VALUE(-12880.000000), SIMDE_FLOAT16_VALUE(-11296.000000), - SIMDE_FLOAT16_VALUE(19456.000000), SIMDE_FLOAT16_VALUE(18704.000000)}}, - {{SIMDE_FLOAT16_VALUE(525.50), SIMDE_FLOAT16_VALUE(-679.00), - SIMDE_FLOAT16_VALUE(491.50), SIMDE_FLOAT16_VALUE(-505.00), - SIMDE_FLOAT16_VALUE(914.50), SIMDE_FLOAT16_VALUE(-312.00), - SIMDE_FLOAT16_VALUE(-404.50), SIMDE_FLOAT16_VALUE(-634.00)}, - {SIMDE_FLOAT16_VALUE(-86.62), SIMDE_FLOAT16_VALUE(-914.50), - SIMDE_FLOAT16_VALUE(-839.50), SIMDE_FLOAT16_VALUE(817.50), - SIMDE_FLOAT16_VALUE(-187.25), SIMDE_FLOAT16_VALUE(422.75), - SIMDE_FLOAT16_VALUE(604.50), SIMDE_FLOAT16_VALUE(-735.00)}, - {SIMDE_FLOAT16_VALUE(-21.31), SIMDE_FLOAT16_VALUE(-29.59), - SIMDE_FLOAT16_VALUE(-725.00), SIMDE_FLOAT16_VALUE(-503.00), - SIMDE_FLOAT16_VALUE(-75.56), SIMDE_FLOAT16_VALUE(215.38), - SIMDE_FLOAT16_VALUE(-742.00), SIMDE_FLOAT16_VALUE(-854.00)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(2372.000000), SIMDE_FLOAT16_VALUE(1167.000000), - SIMDE_FLOAT16_VALUE(18384.000000), SIMDE_FLOAT16_VALUE(17392.000000), - SIMDE_FLOAT16_VALUE(4904.000000), SIMDE_FLOAT16_VALUE(3678.000000), - SIMDE_FLOAT16_VALUE(-13288.000000), - SIMDE_FLOAT16_VALUE(-13520.000000)}}, - {{SIMDE_FLOAT16_VALUE(-618.50), SIMDE_FLOAT16_VALUE(164.88), - SIMDE_FLOAT16_VALUE(304.75), SIMDE_FLOAT16_VALUE(-530.50), - SIMDE_FLOAT16_VALUE(-603.50), SIMDE_FLOAT16_VALUE(730.50), - SIMDE_FLOAT16_VALUE(46.66), SIMDE_FLOAT16_VALUE(629.00)}, - {SIMDE_FLOAT16_VALUE(-537.00), SIMDE_FLOAT16_VALUE(637.00), - SIMDE_FLOAT16_VALUE(884.50), SIMDE_FLOAT16_VALUE(378.25), - SIMDE_FLOAT16_VALUE(-10.17), SIMDE_FLOAT16_VALUE(-730.00), - SIMDE_FLOAT16_VALUE(-981.50), SIMDE_FLOAT16_VALUE(453.25)}, - {SIMDE_FLOAT16_VALUE(-575.00), SIMDE_FLOAT16_VALUE(12.88), - SIMDE_FLOAT16_VALUE(-667.50), SIMDE_FLOAT16_VALUE(380.50), - SIMDE_FLOAT16_VALUE(374.75), SIMDE_FLOAT16_VALUE(-222.50), - SIMDE_FLOAT16_VALUE(206.88), SIMDE_FLOAT16_VALUE(502.25)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-7536.000000), SIMDE_FLOAT16_VALUE(-6752.000000), - SIMDE_FLOAT16_VALUE(11696.000000), SIMDE_FLOAT16_VALUE(10864.000000), - SIMDE_FLOAT16_VALUE(-734.500000), SIMDE_FLOAT16_VALUE(599.500000), - SIMDE_FLOAT16_VALUE(-12600.000000), - SIMDE_FLOAT16_VALUE(-12016.000000)}}, - {{SIMDE_FLOAT16_VALUE(-825.50), SIMDE_FLOAT16_VALUE(-472.75), - SIMDE_FLOAT16_VALUE(-531.00), SIMDE_FLOAT16_VALUE(-366.75), - SIMDE_FLOAT16_VALUE(143.12), SIMDE_FLOAT16_VALUE(698.50), - SIMDE_FLOAT16_VALUE(700.00), SIMDE_FLOAT16_VALUE(498.25)}, - {SIMDE_FLOAT16_VALUE(908.00), SIMDE_FLOAT16_VALUE(845.50), - SIMDE_FLOAT16_VALUE(-383.50), SIMDE_FLOAT16_VALUE(383.50), - SIMDE_FLOAT16_VALUE(357.75), SIMDE_FLOAT16_VALUE(-900.50), - SIMDE_FLOAT16_VALUE(-802.00), SIMDE_FLOAT16_VALUE(966.50)}, - {SIMDE_FLOAT16_VALUE(-993.00), SIMDE_FLOAT16_VALUE(477.50), - SIMDE_FLOAT16_VALUE(-23.00), SIMDE_FLOAT16_VALUE(102.38), - SIMDE_FLOAT16_VALUE(988.50), SIMDE_FLOAT16_VALUE(-311.75), - SIMDE_FLOAT16_VALUE(-668.50), SIMDE_FLOAT16_VALUE(148.25)}, - INT32_C(2), - {SIMDE_FLOAT16_VALUE(-21712.000000), SIMDE_FLOAT16_VALUE(-21360.000000), - SIMDE_FLOAT16_VALUE(8288.000000), SIMDE_FLOAT16_VALUE(8456.000000), - SIMDE_FLOAT16_VALUE(-8084.000000), SIMDE_FLOAT16_VALUE(-7528.000000), - SIMDE_FLOAT16_VALUE(19152.000000), SIMDE_FLOAT16_VALUE(18944.000000)}}, - {{SIMDE_FLOAT16_VALUE(213.88), SIMDE_FLOAT16_VALUE(337.75), - SIMDE_FLOAT16_VALUE(330.50), SIMDE_FLOAT16_VALUE(-88.56), - SIMDE_FLOAT16_VALUE(191.12), SIMDE_FLOAT16_VALUE(-615.00), - SIMDE_FLOAT16_VALUE(-113.25), SIMDE_FLOAT16_VALUE(775.50)}, - {SIMDE_FLOAT16_VALUE(295.50), SIMDE_FLOAT16_VALUE(687.00), - SIMDE_FLOAT16_VALUE(406.25), SIMDE_FLOAT16_VALUE(439.50), - SIMDE_FLOAT16_VALUE(-827.50), SIMDE_FLOAT16_VALUE(733.00), - SIMDE_FLOAT16_VALUE(499.00), SIMDE_FLOAT16_VALUE(931.00)}, - {SIMDE_FLOAT16_VALUE(790.00), SIMDE_FLOAT16_VALUE(-979.00), - SIMDE_FLOAT16_VALUE(70.62), SIMDE_FLOAT16_VALUE(-47.00), - SIMDE_FLOAT16_VALUE(228.50), SIMDE_FLOAT16_VALUE(-233.50), - SIMDE_FLOAT16_VALUE(-467.50), SIMDE_FLOAT16_VALUE(545.00)}, - INT32_C(3), - {SIMDE_FLOAT16_VALUE(-13672.000000), SIMDE_FLOAT16_VALUE(-13552.000000), - SIMDE_FLOAT16_VALUE(-18768.000000), SIMDE_FLOAT16_VALUE(-19184.000000), - SIMDE_FLOAT16_VALUE(39072.000000), SIMDE_FLOAT16_VALUE(38272.000000), - SIMDE_FLOAT16_VALUE(-23568.000000), - SIMDE_FLOAT16_VALUE(-22672.000000)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT16_VALUE( -30.36), SIMDE_FLOAT16_VALUE( 631.50), SIMDE_FLOAT16_VALUE( 851.00), SIMDE_FLOAT16_VALUE( -263.50), + SIMDE_FLOAT16_VALUE( 140.00), SIMDE_FLOAT16_VALUE( 859.00), SIMDE_FLOAT16_VALUE( -834.50), SIMDE_FLOAT16_VALUE( 216.12) }, + { SIMDE_FLOAT16_VALUE( 996.00), SIMDE_FLOAT16_VALUE( 529.50), SIMDE_FLOAT16_VALUE( 79.06), SIMDE_FLOAT16_VALUE( 947.00), + SIMDE_FLOAT16_VALUE( 122.00), SIMDE_FLOAT16_VALUE( -250.00), SIMDE_FLOAT16_VALUE( -361.75), SIMDE_FLOAT16_VALUE( 265.25) }, + { SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 2.71), SIMDE_FLOAT16_VALUE( 99.81), SIMDE_FLOAT16_VALUE( -137.62), + SIMDE_FLOAT16_VALUE( -761.00), SIMDE_FLOAT16_VALUE( 813.00), SIMDE_FLOAT16_VALUE( -897.50), SIMDE_FLOAT16_VALUE( 653.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(58400.000000), SIMDE_FLOAT16_VALUE(59040.000000), SIMDE_FLOAT16_VALUE(5488.000000), SIMDE_FLOAT16_VALUE(4376.000000), + SIMDE_FLOAT16_VALUE(7296.000000), SIMDE_FLOAT16_VALUE(8016.000000), SIMDE_FLOAT16_VALUE(-22048.000000), SIMDE_FLOAT16_VALUE(-21008.000000) } }, + { { SIMDE_FLOAT16_VALUE( 396.00), SIMDE_FLOAT16_VALUE( 413.00), SIMDE_FLOAT16_VALUE( 514.00), SIMDE_FLOAT16_VALUE( -977.50), + SIMDE_FLOAT16_VALUE( -672.00), SIMDE_FLOAT16_VALUE( -92.12), SIMDE_FLOAT16_VALUE( -441.25), SIMDE_FLOAT16_VALUE( -374.25) }, + { SIMDE_FLOAT16_VALUE( -152.00), SIMDE_FLOAT16_VALUE( -79.56), SIMDE_FLOAT16_VALUE( -214.62), SIMDE_FLOAT16_VALUE( -615.00), + SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 784.00), SIMDE_FLOAT16_VALUE( 493.00), SIMDE_FLOAT16_VALUE( -896.00) }, + { SIMDE_FLOAT16_VALUE( 104.12), SIMDE_FLOAT16_VALUE( 78.50), SIMDE_FLOAT16_VALUE( 171.50), SIMDE_FLOAT16_VALUE( -682.50), + SIMDE_FLOAT16_VALUE( 217.12), SIMDE_FLOAT16_VALUE( 49.34), SIMDE_FLOAT16_VALUE( 256.50), SIMDE_FLOAT16_VALUE( -92.06) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-11536.000000), SIMDE_FLOAT16_VALUE(-11520.000000), SIMDE_FLOAT16_VALUE(-16336.000000), SIMDE_FLOAT16_VALUE(-17824.000000), + SIMDE_FLOAT16_VALUE(52640.000000), SIMDE_FLOAT16_VALUE(53216.000000), SIMDE_FLOAT16_VALUE(38272.000000), SIMDE_FLOAT16_VALUE(38336.000000) } }, + { { SIMDE_FLOAT16_VALUE( -728.00), SIMDE_FLOAT16_VALUE( -108.38), SIMDE_FLOAT16_VALUE( -77.88), SIMDE_FLOAT16_VALUE( -353.00), + SIMDE_FLOAT16_VALUE( -239.00), SIMDE_FLOAT16_VALUE( 704.50), SIMDE_FLOAT16_VALUE( 914.00), SIMDE_FLOAT16_VALUE( -211.12) }, + { SIMDE_FLOAT16_VALUE( -473.25), SIMDE_FLOAT16_VALUE( 74.38), SIMDE_FLOAT16_VALUE( 904.50), SIMDE_FLOAT16_VALUE( -290.50), + SIMDE_FLOAT16_VALUE( -796.00), SIMDE_FLOAT16_VALUE( 421.25), SIMDE_FLOAT16_VALUE( 215.75), SIMDE_FLOAT16_VALUE( 249.38) }, + { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -720.00), SIMDE_FLOAT16_VALUE( 32.75), SIMDE_FLOAT16_VALUE( -487.75), + SIMDE_FLOAT16_VALUE( -705.50), SIMDE_FLOAT16_VALUE( -468.00), SIMDE_FLOAT16_VALUE( -789.00), SIMDE_FLOAT16_VALUE( -866.00) }, + INT32_C( 2), + { SIMDE_FLOAT16_VALUE(-16224.000000), SIMDE_FLOAT16_VALUE(-15608.000000), SIMDE_FLOAT16_VALUE(29552.000000), SIMDE_FLOAT16_VALUE(29264.000000), + SIMDE_FLOAT16_VALUE(-26304.000000), SIMDE_FLOAT16_VALUE(-25360.000000), SIMDE_FLOAT16_VALUE(7980.000000), SIMDE_FLOAT16_VALUE(6856.000000) } }, + { { SIMDE_FLOAT16_VALUE( -891.50), SIMDE_FLOAT16_VALUE( -299.00), SIMDE_FLOAT16_VALUE( -595.00), SIMDE_FLOAT16_VALUE( -662.00), + SIMDE_FLOAT16_VALUE( -914.00), SIMDE_FLOAT16_VALUE( 674.50), SIMDE_FLOAT16_VALUE( 771.50), SIMDE_FLOAT16_VALUE( 14.33) }, + { SIMDE_FLOAT16_VALUE( 880.00), SIMDE_FLOAT16_VALUE( 767.00), SIMDE_FLOAT16_VALUE( -738.50), SIMDE_FLOAT16_VALUE( 581.50), + SIMDE_FLOAT16_VALUE( -342.00), SIMDE_FLOAT16_VALUE( 580.50), SIMDE_FLOAT16_VALUE( 534.00), SIMDE_FLOAT16_VALUE( -671.00) }, + { SIMDE_FLOAT16_VALUE( -482.75), SIMDE_FLOAT16_VALUE( 382.25), SIMDE_FLOAT16_VALUE( 503.00), SIMDE_FLOAT16_VALUE( 35.00), + SIMDE_FLOAT16_VALUE( 315.50), SIMDE_FLOAT16_VALUE( -23.56), SIMDE_FLOAT16_VALUE( 53.88), SIMDE_FLOAT16_VALUE( 722.00) }, + INT32_C( 3), + { SIMDE_FLOAT16_VALUE(29904.000000), SIMDE_FLOAT16_VALUE(30496.000000), SIMDE_FLOAT16_VALUE(-26448.000000), SIMDE_FLOAT16_VALUE(-26512.000000), + SIMDE_FLOAT16_VALUE(-12880.000000), SIMDE_FLOAT16_VALUE(-11296.000000), SIMDE_FLOAT16_VALUE(19456.000000), SIMDE_FLOAT16_VALUE(18704.000000) } }, + { { SIMDE_FLOAT16_VALUE( 525.50), SIMDE_FLOAT16_VALUE( -679.00), SIMDE_FLOAT16_VALUE( 491.50), SIMDE_FLOAT16_VALUE( -505.00), + SIMDE_FLOAT16_VALUE( 914.50), SIMDE_FLOAT16_VALUE( -312.00), SIMDE_FLOAT16_VALUE( -404.50), SIMDE_FLOAT16_VALUE( -634.00) }, + { SIMDE_FLOAT16_VALUE( -86.62), SIMDE_FLOAT16_VALUE( -914.50), SIMDE_FLOAT16_VALUE( -839.50), SIMDE_FLOAT16_VALUE( 817.50), + SIMDE_FLOAT16_VALUE( -187.25), SIMDE_FLOAT16_VALUE( 422.75), SIMDE_FLOAT16_VALUE( 604.50), SIMDE_FLOAT16_VALUE( -735.00) }, + { SIMDE_FLOAT16_VALUE( -21.31), SIMDE_FLOAT16_VALUE( -29.59), SIMDE_FLOAT16_VALUE( -725.00), SIMDE_FLOAT16_VALUE( -503.00), + SIMDE_FLOAT16_VALUE( -75.56), SIMDE_FLOAT16_VALUE( 215.38), SIMDE_FLOAT16_VALUE( -742.00), SIMDE_FLOAT16_VALUE( -854.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(2372.000000), SIMDE_FLOAT16_VALUE(1167.000000), SIMDE_FLOAT16_VALUE(18384.000000), SIMDE_FLOAT16_VALUE(17392.000000), + SIMDE_FLOAT16_VALUE(4904.000000), SIMDE_FLOAT16_VALUE(3678.000000), SIMDE_FLOAT16_VALUE(-13288.000000), SIMDE_FLOAT16_VALUE(-13520.000000) } }, + { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( 164.88), SIMDE_FLOAT16_VALUE( 304.75), SIMDE_FLOAT16_VALUE( -530.50), + SIMDE_FLOAT16_VALUE( -603.50), SIMDE_FLOAT16_VALUE( 730.50), SIMDE_FLOAT16_VALUE( 46.66), SIMDE_FLOAT16_VALUE( 629.00) }, + { SIMDE_FLOAT16_VALUE( -537.00), SIMDE_FLOAT16_VALUE( 637.00), SIMDE_FLOAT16_VALUE( 884.50), SIMDE_FLOAT16_VALUE( 378.25), + SIMDE_FLOAT16_VALUE( -10.17), SIMDE_FLOAT16_VALUE( -730.00), SIMDE_FLOAT16_VALUE( -981.50), SIMDE_FLOAT16_VALUE( 453.25) }, + { SIMDE_FLOAT16_VALUE( -575.00), SIMDE_FLOAT16_VALUE( 12.88), SIMDE_FLOAT16_VALUE( -667.50), SIMDE_FLOAT16_VALUE( 380.50), + SIMDE_FLOAT16_VALUE( 374.75), SIMDE_FLOAT16_VALUE( -222.50), SIMDE_FLOAT16_VALUE( 206.88), SIMDE_FLOAT16_VALUE( 502.25) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-7536.000000), SIMDE_FLOAT16_VALUE(-6752.000000), SIMDE_FLOAT16_VALUE(11696.000000), SIMDE_FLOAT16_VALUE(10864.000000), + SIMDE_FLOAT16_VALUE(-734.500000), SIMDE_FLOAT16_VALUE(599.500000), SIMDE_FLOAT16_VALUE(-12600.000000), SIMDE_FLOAT16_VALUE(-12016.000000) } }, + { { SIMDE_FLOAT16_VALUE( -825.50), SIMDE_FLOAT16_VALUE( -472.75), SIMDE_FLOAT16_VALUE( -531.00), SIMDE_FLOAT16_VALUE( -366.75), + SIMDE_FLOAT16_VALUE( 143.12), SIMDE_FLOAT16_VALUE( 698.50), SIMDE_FLOAT16_VALUE( 700.00), SIMDE_FLOAT16_VALUE( 498.25) }, + { SIMDE_FLOAT16_VALUE( 908.00), SIMDE_FLOAT16_VALUE( 845.50), SIMDE_FLOAT16_VALUE( -383.50), SIMDE_FLOAT16_VALUE( 383.50), + SIMDE_FLOAT16_VALUE( 357.75), SIMDE_FLOAT16_VALUE( -900.50), SIMDE_FLOAT16_VALUE( -802.00), SIMDE_FLOAT16_VALUE( 966.50) }, + { SIMDE_FLOAT16_VALUE( -993.00), SIMDE_FLOAT16_VALUE( 477.50), SIMDE_FLOAT16_VALUE( -23.00), SIMDE_FLOAT16_VALUE( 102.38), + SIMDE_FLOAT16_VALUE( 988.50), SIMDE_FLOAT16_VALUE( -311.75), SIMDE_FLOAT16_VALUE( -668.50), SIMDE_FLOAT16_VALUE( 148.25) }, + INT32_C( 2), + { SIMDE_FLOAT16_VALUE(-21712.000000), SIMDE_FLOAT16_VALUE(-21360.000000), SIMDE_FLOAT16_VALUE(8288.000000), SIMDE_FLOAT16_VALUE(8456.000000), + SIMDE_FLOAT16_VALUE(-8084.000000), SIMDE_FLOAT16_VALUE(-7528.000000), SIMDE_FLOAT16_VALUE(19152.000000), SIMDE_FLOAT16_VALUE(18944.000000) } }, + { { SIMDE_FLOAT16_VALUE( 213.88), SIMDE_FLOAT16_VALUE( 337.75), SIMDE_FLOAT16_VALUE( 330.50), SIMDE_FLOAT16_VALUE( -88.56), + SIMDE_FLOAT16_VALUE( 191.12), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 775.50) }, + { SIMDE_FLOAT16_VALUE( 295.50), SIMDE_FLOAT16_VALUE( 687.00), SIMDE_FLOAT16_VALUE( 406.25), SIMDE_FLOAT16_VALUE( 439.50), + SIMDE_FLOAT16_VALUE( -827.50), SIMDE_FLOAT16_VALUE( 733.00), SIMDE_FLOAT16_VALUE( 499.00), SIMDE_FLOAT16_VALUE( 931.00) }, + { SIMDE_FLOAT16_VALUE( 790.00), SIMDE_FLOAT16_VALUE( -979.00), SIMDE_FLOAT16_VALUE( 70.62), SIMDE_FLOAT16_VALUE( -47.00), + SIMDE_FLOAT16_VALUE( 228.50), SIMDE_FLOAT16_VALUE( -233.50), SIMDE_FLOAT16_VALUE( -467.50), SIMDE_FLOAT16_VALUE( 545.00) }, + INT32_C( 3), + { SIMDE_FLOAT16_VALUE(-13672.000000), SIMDE_FLOAT16_VALUE(-13552.000000), SIMDE_FLOAT16_VALUE(-18768.000000), SIMDE_FLOAT16_VALUE(-19184.000000), + SIMDE_FLOAT16_VALUE(39072.000000), SIMDE_FLOAT16_VALUE(38272.000000), SIMDE_FLOAT16_VALUE(-23568.000000), SIMDE_FLOAT16_VALUE(-22672.000000) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); simde_float16x8_t r; - SIMDE_CONSTIFY_4_( - simde_vcmlaq_laneq_f16, r, - (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - test_vec[i].lane, r_, a, b); - simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), - 1); + SIMDE_CONSTIFY_4_(simde_vcmlaq_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 2, 3, 0, 1, 2, 3}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; + for (int i = 0 ; i < 8 ; i++) { simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t r = simde_vcmlaq_laneq_f16(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -879,7 +644,8 @@ static int test_simde_vcmlaq_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmlaq_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmlaq_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[4]; @@ -888,102 +654,67 @@ static int test_simde_vcmlaq_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[4]; } test_vec[] = { - {{SIMDE_FLOAT32_C(355.18), SIMDE_FLOAT32_C(169.63), - SIMDE_FLOAT32_C(116.87), SIMDE_FLOAT32_C(-467.19)}, - {SIMDE_FLOAT32_C(-513.94), SIMDE_FLOAT32_C(-999.71), - SIMDE_FLOAT32_C(-285.25), SIMDE_FLOAT32_C(-931.88)}, - {SIMDE_FLOAT32_C(-839.57), SIMDE_FLOAT32_C(-681.40), - SIMDE_FLOAT32_C(-117.60), SIMDE_FLOAT32_C(-459.86)}, - INT32_C(0), - {SIMDE_FLOAT32_C(431843.781250), SIMDE_FLOAT32_C(431658.250000), - SIMDE_FLOAT32_C(239604.218750), SIMDE_FLOAT32_C(239020.156250)}}, - {{SIMDE_FLOAT32_C(-57.67), SIMDE_FLOAT32_C(-897.07), - SIMDE_FLOAT32_C(118.98), SIMDE_FLOAT32_C(-387.92)}, - {SIMDE_FLOAT32_C(-362.79), SIMDE_FLOAT32_C(160.99), - SIMDE_FLOAT32_C(-2.72), SIMDE_FLOAT32_C(206.65)}, - {SIMDE_FLOAT32_C(49.34), SIMDE_FLOAT32_C(511.85), - SIMDE_FLOAT32_C(547.20), SIMDE_FLOAT32_C(-119.58)}, - INT32_C(1), - {SIMDE_FLOAT32_C(-185751.734375), SIMDE_FLOAT32_C(-186591.140625), - SIMDE_FLOAT32_C(-1273.252075), SIMDE_FLOAT32_C(-1780.152100)}}, - {{SIMDE_FLOAT32_C(-219.54), SIMDE_FLOAT32_C(-959.14), - SIMDE_FLOAT32_C(943.92), SIMDE_FLOAT32_C(628.48)}, - {SIMDE_FLOAT32_C(446.65), SIMDE_FLOAT32_C(-500.77), - SIMDE_FLOAT32_C(-347.79), SIMDE_FLOAT32_C(813.11)}, - {SIMDE_FLOAT32_C(-542.25), SIMDE_FLOAT32_C(232.48), - SIMDE_FLOAT32_C(684.35), SIMDE_FLOAT32_C(710.26)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-242415.500000), SIMDE_FLOAT32_C(-243155.093750), - SIMDE_FLOAT32_C(189533.046875), SIMDE_FLOAT32_C(189217.609375)}}, - {{SIMDE_FLOAT32_C(783.09), SIMDE_FLOAT32_C(-727.02), - SIMDE_FLOAT32_C(-586.46), SIMDE_FLOAT32_C(64.33)}, - {SIMDE_FLOAT32_C(-490.08), SIMDE_FLOAT32_C(740.49), - SIMDE_FLOAT32_C(-591.56), SIMDE_FLOAT32_C(-759.78)}, - {SIMDE_FLOAT32_C(-380.84), SIMDE_FLOAT32_C(993.01), - SIMDE_FLOAT32_C(-759.56), SIMDE_FLOAT32_C(861.16)}, - INT32_C(1), - {SIMDE_FLOAT32_C(-485871.250000), SIMDE_FLOAT32_C(-487381.343750), - SIMDE_FLOAT32_C(-588011.437500), SIMDE_FLOAT32_C(-587360.687500)}}, - {{SIMDE_FLOAT32_C(998.31), SIMDE_FLOAT32_C(538.40), - SIMDE_FLOAT32_C(-191.12), SIMDE_FLOAT32_C(-434.48)}, - {SIMDE_FLOAT32_C(592.83), SIMDE_FLOAT32_C(820.32), - SIMDE_FLOAT32_C(-296.84), SIMDE_FLOAT32_C(-612.30)}, - {SIMDE_FLOAT32_C(-552.34), SIMDE_FLOAT32_C(329.08), - SIMDE_FLOAT32_C(765.26), SIMDE_FLOAT32_C(-531.08)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-326445.437500), SIMDE_FLOAT32_C(-326905.343750), - SIMDE_FLOAT32_C(163765.484375), SIMDE_FLOAT32_C(163522.125000)}}, - {{SIMDE_FLOAT32_C(52.61), SIMDE_FLOAT32_C(606.93), - SIMDE_FLOAT32_C(-894.22), SIMDE_FLOAT32_C(-854.38)}, - {SIMDE_FLOAT32_C(972.80), SIMDE_FLOAT32_C(-807.39), - SIMDE_FLOAT32_C(668.59), SIMDE_FLOAT32_C(-228.19)}, - {SIMDE_FLOAT32_C(-528.51), SIMDE_FLOAT32_C(730.93), - SIMDE_FLOAT32_C(-230.95), SIMDE_FLOAT32_C(-140.17)}, - INT32_C(1), - {SIMDE_FLOAT32_C(711101.312500), SIMDE_FLOAT32_C(711655.625000), - SIMDE_FLOAT32_C(487798.281250), SIMDE_FLOAT32_C(487838.125000)}}, - {{SIMDE_FLOAT32_C(556.73), SIMDE_FLOAT32_C(-701.90), - SIMDE_FLOAT32_C(-356.50), SIMDE_FLOAT32_C(-532.42)}, - {SIMDE_FLOAT32_C(856.94), SIMDE_FLOAT32_C(-261.67), - SIMDE_FLOAT32_C(-208.07), SIMDE_FLOAT32_C(27.93)}, - {SIMDE_FLOAT32_C(924.32), SIMDE_FLOAT32_C(-863.60), - SIMDE_FLOAT32_C(-687.65), SIMDE_FLOAT32_C(238.39)}, - INT32_C(0), - {SIMDE_FLOAT32_C(792643.500000), SIMDE_FLOAT32_C(791384.875000), - SIMDE_FLOAT32_C(-192679.765625), SIMDE_FLOAT32_C(-192855.687500)}}, - {{SIMDE_FLOAT32_C(-286.79), SIMDE_FLOAT32_C(630.61), - SIMDE_FLOAT32_C(-989.22), SIMDE_FLOAT32_C(223.21)}, - {SIMDE_FLOAT32_C(812.31), SIMDE_FLOAT32_C(667.33), - SIMDE_FLOAT32_C(841.41), SIMDE_FLOAT32_C(735.52)}, - {SIMDE_FLOAT32_C(308.52), SIMDE_FLOAT32_C(-189.06), - SIMDE_FLOAT32_C(-63.33), SIMDE_FLOAT32_C(837.76)}, - INT32_C(1), - {SIMDE_FLOAT32_C(-153862.109375), SIMDE_FLOAT32_C(-152944.718750), - SIMDE_FLOAT32_C(-160066.187500), SIMDE_FLOAT32_C(-158853.750000)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT32_C( 355.18), SIMDE_FLOAT32_C( 169.63), SIMDE_FLOAT32_C( 116.87), SIMDE_FLOAT32_C( -467.19) }, + { SIMDE_FLOAT32_C( -513.94), SIMDE_FLOAT32_C( -999.71), SIMDE_FLOAT32_C( -285.25), SIMDE_FLOAT32_C( -931.88) }, + { SIMDE_FLOAT32_C( -839.57), SIMDE_FLOAT32_C( -681.40), SIMDE_FLOAT32_C( -117.60), SIMDE_FLOAT32_C( -459.86) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(431843.781250), SIMDE_FLOAT32_C(431658.250000), SIMDE_FLOAT32_C(239604.218750), SIMDE_FLOAT32_C(239020.156250) } }, + { { SIMDE_FLOAT32_C( -57.67), SIMDE_FLOAT32_C( -897.07), SIMDE_FLOAT32_C( 118.98), SIMDE_FLOAT32_C( -387.92) }, + { SIMDE_FLOAT32_C( -362.79), SIMDE_FLOAT32_C( 160.99), SIMDE_FLOAT32_C( -2.72), SIMDE_FLOAT32_C( 206.65) }, + { SIMDE_FLOAT32_C( 49.34), SIMDE_FLOAT32_C( 511.85), SIMDE_FLOAT32_C( 547.20), SIMDE_FLOAT32_C( -119.58) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-185751.734375), SIMDE_FLOAT32_C(-186591.140625), SIMDE_FLOAT32_C(-1273.252075), SIMDE_FLOAT32_C(-1780.152100) } }, + { { SIMDE_FLOAT32_C( -219.54), SIMDE_FLOAT32_C( -959.14), SIMDE_FLOAT32_C( 943.92), SIMDE_FLOAT32_C( 628.48) }, + { SIMDE_FLOAT32_C( 446.65), SIMDE_FLOAT32_C( -500.77), SIMDE_FLOAT32_C( -347.79), SIMDE_FLOAT32_C( 813.11) }, + { SIMDE_FLOAT32_C( -542.25), SIMDE_FLOAT32_C( 232.48), SIMDE_FLOAT32_C( 684.35), SIMDE_FLOAT32_C( 710.26) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-242415.500000), SIMDE_FLOAT32_C(-243155.093750), SIMDE_FLOAT32_C(189533.046875), SIMDE_FLOAT32_C(189217.609375) } }, + { { SIMDE_FLOAT32_C( 783.09), SIMDE_FLOAT32_C( -727.02), SIMDE_FLOAT32_C( -586.46), SIMDE_FLOAT32_C( 64.33) }, + { SIMDE_FLOAT32_C( -490.08), SIMDE_FLOAT32_C( 740.49), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -759.78) }, + { SIMDE_FLOAT32_C( -380.84), SIMDE_FLOAT32_C( 993.01), SIMDE_FLOAT32_C( -759.56), SIMDE_FLOAT32_C( 861.16) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-485871.250000), SIMDE_FLOAT32_C(-487381.343750), SIMDE_FLOAT32_C(-588011.437500), SIMDE_FLOAT32_C(-587360.687500) } }, + { { SIMDE_FLOAT32_C( 998.31), SIMDE_FLOAT32_C( 538.40), SIMDE_FLOAT32_C( -191.12), SIMDE_FLOAT32_C( -434.48) }, + { SIMDE_FLOAT32_C( 592.83), SIMDE_FLOAT32_C( 820.32), SIMDE_FLOAT32_C( -296.84), SIMDE_FLOAT32_C( -612.30) }, + { SIMDE_FLOAT32_C( -552.34), SIMDE_FLOAT32_C( 329.08), SIMDE_FLOAT32_C( 765.26), SIMDE_FLOAT32_C( -531.08) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-326445.437500), SIMDE_FLOAT32_C(-326905.343750), SIMDE_FLOAT32_C(163765.484375), SIMDE_FLOAT32_C(163522.125000) } }, + { { SIMDE_FLOAT32_C( 52.61), SIMDE_FLOAT32_C( 606.93), SIMDE_FLOAT32_C( -894.22), SIMDE_FLOAT32_C( -854.38) }, + { SIMDE_FLOAT32_C( 972.80), SIMDE_FLOAT32_C( -807.39), SIMDE_FLOAT32_C( 668.59), SIMDE_FLOAT32_C( -228.19) }, + { SIMDE_FLOAT32_C( -528.51), SIMDE_FLOAT32_C( 730.93), SIMDE_FLOAT32_C( -230.95), SIMDE_FLOAT32_C( -140.17) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(711101.312500), SIMDE_FLOAT32_C(711655.625000), SIMDE_FLOAT32_C(487798.281250), SIMDE_FLOAT32_C(487838.125000) } }, + { { SIMDE_FLOAT32_C( 556.73), SIMDE_FLOAT32_C( -701.90), SIMDE_FLOAT32_C( -356.50), SIMDE_FLOAT32_C( -532.42) }, + { SIMDE_FLOAT32_C( 856.94), SIMDE_FLOAT32_C( -261.67), SIMDE_FLOAT32_C( -208.07), SIMDE_FLOAT32_C( 27.93) }, + { SIMDE_FLOAT32_C( 924.32), SIMDE_FLOAT32_C( -863.60), SIMDE_FLOAT32_C( -687.65), SIMDE_FLOAT32_C( 238.39) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(792643.500000), SIMDE_FLOAT32_C(791384.875000), SIMDE_FLOAT32_C(-192679.765625), SIMDE_FLOAT32_C(-192855.687500) } }, + { { SIMDE_FLOAT32_C( -286.79), SIMDE_FLOAT32_C( 630.61), SIMDE_FLOAT32_C( -989.22), SIMDE_FLOAT32_C( 223.21) }, + { SIMDE_FLOAT32_C( 812.31), SIMDE_FLOAT32_C( 667.33), SIMDE_FLOAT32_C( 841.41), SIMDE_FLOAT32_C( 735.52) }, + { SIMDE_FLOAT32_C( 308.52), SIMDE_FLOAT32_C( -189.06), SIMDE_FLOAT32_C( -63.33), SIMDE_FLOAT32_C( 837.76) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-153862.109375), SIMDE_FLOAT32_C(-152944.718750), SIMDE_FLOAT32_C(-160066.187500), SIMDE_FLOAT32_C(-158853.750000) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r; - SIMDE_CONSTIFY_2_( - simde_vcmlaq_laneq_f32, r, - (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), - test_vec[i].lane, r_, a, b); - simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), - 1); + SIMDE_CONSTIFY_2_(simde_vcmlaq_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcmlaq_laneq_f32(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); diff --git a/test/arm/neon/cmla_rot180_lane.c b/test/arm/neon/cmla_rot180_lane.c index fcf3b86eb..b0539b028 100644 --- a/test/arm/neon/cmla_rot180_lane.c +++ b/test/arm/neon/cmla_rot180_lane.c @@ -1,11 +1,11 @@ #define SIMDE_TEST_ARM_NEON_INSN cmla_rot180_lane +#include "test-neon.h" #include "../../../simde/arm/neon/cmla_rot180_lane.h" - #include "../../../simde/arm/neon/dup_n.h" -#include "test-neon.h" -static int test_simde_vcmla_rot180_lane_f16(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmla_rot180_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float16_t r_[4]; @@ -14,104 +14,68 @@ static int test_simde_vcmla_rot180_lane_f16(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[4]; } test_vec[] = { - {{SIMDE_FLOAT16_VALUE(-49.28), SIMDE_FLOAT16_VALUE(-109.00), - SIMDE_FLOAT16_VALUE(-626.50), SIMDE_FLOAT16_VALUE(-567.00)}, - {SIMDE_FLOAT16_VALUE(-178.88), SIMDE_FLOAT16_VALUE(10.22), - SIMDE_FLOAT16_VALUE(-228.12), SIMDE_FLOAT16_VALUE(-31.19)}, - {SIMDE_FLOAT16_VALUE(42.90), SIMDE_FLOAT16_VALUE(-98.75), - SIMDE_FLOAT16_VALUE(350.00), SIMDE_FLOAT16_VALUE(-48.00)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(7624.00), SIMDE_FLOAT16_VALUE(7564.00), - SIMDE_FLOAT16_VALUE(9160.00), SIMDE_FLOAT16_VALUE(9224.00)}}, - {{SIMDE_FLOAT16_VALUE(730.00), SIMDE_FLOAT16_VALUE(103.12), - SIMDE_FLOAT16_VALUE(-381.25), SIMDE_FLOAT16_VALUE(898.50)}, - {SIMDE_FLOAT16_VALUE(293.00), SIMDE_FLOAT16_VALUE(-14.06), - SIMDE_FLOAT16_VALUE(-137.62), SIMDE_FLOAT16_VALUE(722.50)}, - {SIMDE_FLOAT16_VALUE(-6.33), SIMDE_FLOAT16_VALUE(61.38), - SIMDE_FLOAT16_VALUE(115.50), SIMDE_FLOAT16_VALUE(70.38)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-17248.00), SIMDE_FLOAT16_VALUE(-17872.00), - SIMDE_FLOAT16_VALUE(8064.00), SIMDE_FLOAT16_VALUE(9344.00)}}, - {{SIMDE_FLOAT16_VALUE(-210.88), SIMDE_FLOAT16_VALUE(-430.75), - SIMDE_FLOAT16_VALUE(725.50), SIMDE_FLOAT16_VALUE(-944.50)}, - {SIMDE_FLOAT16_VALUE(142.00), SIMDE_FLOAT16_VALUE(-966.00), - SIMDE_FLOAT16_VALUE(-131.50), SIMDE_FLOAT16_VALUE(462.50)}, - {SIMDE_FLOAT16_VALUE(-61.00), SIMDE_FLOAT16_VALUE(185.62), - SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(749.50)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(8448.00), SIMDE_FLOAT16_VALUE(8232.00), - SIMDE_FLOAT16_VALUE(-7296.00), SIMDE_FLOAT16_VALUE(-8968.00)}}, - {{SIMDE_FLOAT16_VALUE(89.44), SIMDE_FLOAT16_VALUE(-200.50), - SIMDE_FLOAT16_VALUE(-136.50), SIMDE_FLOAT16_VALUE(-180.50)}, - {SIMDE_FLOAT16_VALUE(-157.12), SIMDE_FLOAT16_VALUE(129.00), - SIMDE_FLOAT16_VALUE(99.06), SIMDE_FLOAT16_VALUE(-75.25)}, - {SIMDE_FLOAT16_VALUE(32.59), SIMDE_FLOAT16_VALUE(239.50), - SIMDE_FLOAT16_VALUE(-29.96), SIMDE_FLOAT16_VALUE(-177.50)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(37728.00), SIMDE_FLOAT16_VALUE(37440.00), - SIMDE_FLOAT16_VALUE(-23856.00), SIMDE_FLOAT16_VALUE(-23904.00)}}, - {{SIMDE_FLOAT16_VALUE(167.25), SIMDE_FLOAT16_VALUE(-1.52), - SIMDE_FLOAT16_VALUE(-63.38), SIMDE_FLOAT16_VALUE(57.00)}, - {SIMDE_FLOAT16_VALUE(191.75), SIMDE_FLOAT16_VALUE(-197.00), - SIMDE_FLOAT16_VALUE(285.00), SIMDE_FLOAT16_VALUE(-529.00)}, - {SIMDE_FLOAT16_VALUE(-80.50), SIMDE_FLOAT16_VALUE(375.50), - SIMDE_FLOAT16_VALUE(-206.00), SIMDE_FLOAT16_VALUE(-75.25)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(15600.00), SIMDE_FLOAT16_VALUE(15432.00), - SIMDE_FLOAT16_VALUE(22880.00), SIMDE_FLOAT16_VALUE(22992.00)}}, - {{SIMDE_FLOAT16_VALUE(-31.19), SIMDE_FLOAT16_VALUE(-75.25), - SIMDE_FLOAT16_VALUE(-41.00), SIMDE_FLOAT16_VALUE(285.00)}, - {SIMDE_FLOAT16_VALUE(-1.52), SIMDE_FLOAT16_VALUE(10.22), - SIMDE_FLOAT16_VALUE(-271.25), SIMDE_FLOAT16_VALUE(-257.50)}, - {SIMDE_FLOAT16_VALUE(-31.45), SIMDE_FLOAT16_VALUE(-180.50), - SIMDE_FLOAT16_VALUE(69.62), SIMDE_FLOAT16_VALUE(131.38)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-305.50), SIMDE_FLOAT16_VALUE(-349.50), - SIMDE_FLOAT16_VALUE(-48992.00), SIMDE_FLOAT16_VALUE(-48672.00)}}, - {{SIMDE_FLOAT16_VALUE(205.75), SIMDE_FLOAT16_VALUE(-247.00), - SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(17.94)}, - {SIMDE_FLOAT16_VALUE(-397.75), SIMDE_FLOAT16_VALUE(166.12), - SIMDE_FLOAT16_VALUE(-110.75), SIMDE_FLOAT16_VALUE(18.20)}, - {SIMDE_FLOAT16_VALUE(89.81), SIMDE_FLOAT16_VALUE(59.75), - SIMDE_FLOAT16_VALUE(-113.25), SIMDE_FLOAT16_VALUE(97.31)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(35936.00), SIMDE_FLOAT16_VALUE(35488.00), - SIMDE_FLOAT16_VALUE(10008.00), SIMDE_FLOAT16_VALUE(9968.00)}}, - {{SIMDE_FLOAT16_VALUE(-869.50), SIMDE_FLOAT16_VALUE(-710.00), - SIMDE_FLOAT16_VALUE(457.25), SIMDE_FLOAT16_VALUE(579.00)}, - {SIMDE_FLOAT16_VALUE(226.38), SIMDE_FLOAT16_VALUE(178.88), - SIMDE_FLOAT16_VALUE(181.75), SIMDE_FLOAT16_VALUE(-151.12)}, - {SIMDE_FLOAT16_VALUE(121.50), SIMDE_FLOAT16_VALUE(-78.44), - SIMDE_FLOAT16_VALUE(48.06), SIMDE_FLOAT16_VALUE(75.88)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(16880.00), SIMDE_FLOAT16_VALUE(17040.00), - SIMDE_FLOAT16_VALUE(14712.00), SIMDE_FLOAT16_VALUE(14832.00)}} - + { + { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) }, + { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -31.19) }, + { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -48.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 7624.00), SIMDE_FLOAT16_VALUE( 7564.00), SIMDE_FLOAT16_VALUE( 9160.00), SIMDE_FLOAT16_VALUE( 9224.00) } }, + { { SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, + { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -14.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, + { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 61.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-17248.00), SIMDE_FLOAT16_VALUE(-17872.00), SIMDE_FLOAT16_VALUE( 8064.00), SIMDE_FLOAT16_VALUE( 9344.00) } }, + { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, + { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, + { SIMDE_FLOAT16_VALUE( -61.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 8448.00), SIMDE_FLOAT16_VALUE( 8232.00), SIMDE_FLOAT16_VALUE( -7296.00), SIMDE_FLOAT16_VALUE( -8968.00) } }, + { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -180.50) }, + { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 129.00), SIMDE_FLOAT16_VALUE( 99.06), SIMDE_FLOAT16_VALUE( -75.25) }, + { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 239.50), SIMDE_FLOAT16_VALUE( -29.96), SIMDE_FLOAT16_VALUE( -177.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 37728.00), SIMDE_FLOAT16_VALUE( 37440.00), SIMDE_FLOAT16_VALUE(-23856.00), SIMDE_FLOAT16_VALUE(-23904.00) } }, + { { SIMDE_FLOAT16_VALUE( 167.25), SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( -63.38), SIMDE_FLOAT16_VALUE( 57.00) }, + { SIMDE_FLOAT16_VALUE( 191.75), SIMDE_FLOAT16_VALUE( -197.00), SIMDE_FLOAT16_VALUE( 285.00), SIMDE_FLOAT16_VALUE( -529.00) }, + { SIMDE_FLOAT16_VALUE( -80.50), SIMDE_FLOAT16_VALUE( 375.50), SIMDE_FLOAT16_VALUE( -206.00), SIMDE_FLOAT16_VALUE( -75.25) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 15600.00), SIMDE_FLOAT16_VALUE( 15432.00), SIMDE_FLOAT16_VALUE( 22880.00), SIMDE_FLOAT16_VALUE( 22992.00) } }, + { { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( -41.00), SIMDE_FLOAT16_VALUE( 285.00) }, + { SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -271.25), SIMDE_FLOAT16_VALUE( -257.50) }, + { SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( -180.50), SIMDE_FLOAT16_VALUE( 69.62), SIMDE_FLOAT16_VALUE( 131.38) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -305.50), SIMDE_FLOAT16_VALUE( -349.50), SIMDE_FLOAT16_VALUE(-48992.00), SIMDE_FLOAT16_VALUE(-48672.00) } }, + { { SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -247.00), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 17.94) }, + { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( -110.75), SIMDE_FLOAT16_VALUE( 18.20) }, + { SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 97.31) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 35936.00), SIMDE_FLOAT16_VALUE( 35488.00), SIMDE_FLOAT16_VALUE( 10008.00), SIMDE_FLOAT16_VALUE( 9968.00) } }, + { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00) }, + { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( -151.12) }, + { SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44), SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( 75.88) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 16880.00), SIMDE_FLOAT16_VALUE( 17040.00), SIMDE_FLOAT16_VALUE( 14712.00), SIMDE_FLOAT16_VALUE( 14832.00) } } }; - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x4_t r; - SIMDE_CONSTIFY_2_( - simde_vcmla_rot180_lane_f16, r, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_(simde_vcmla_rot180_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); - } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t r = simde_vcmla_rot180_lane_f16(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -122,7 +86,8 @@ static int test_simde_vcmla_rot180_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmla_rot180_lane_f32(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmla_rot180_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[2]; @@ -131,65 +96,66 @@ static int test_simde_vcmla_rot180_lane_f32(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[2]; } test_vec[] = { - {{SIMDE_FLOAT32_C(83.21), SIMDE_FLOAT32_C(417.90)}, - {SIMDE_FLOAT32_C(-875.72), SIMDE_FLOAT32_C(830.54)}, - {SIMDE_FLOAT32_C(-633.53), SIMDE_FLOAT32_C(832.17)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-554711.687500), SIMDE_FLOAT32_C(-554377.000000)}}, - {{SIMDE_FLOAT32_C(-890.17), SIMDE_FLOAT32_C(649.92)}, - {SIMDE_FLOAT32_C(-111.22), SIMDE_FLOAT32_C(-830.36)}, - {SIMDE_FLOAT32_C(59.76), SIMDE_FLOAT32_C(970.61)}, - INT32_C(0), - {SIMDE_FLOAT32_C(5756.336914), SIMDE_FLOAT32_C(7296.427246)}}, - {{SIMDE_FLOAT32_C(522.31), SIMDE_FLOAT32_C(-822.40)}, - {SIMDE_FLOAT32_C(411.34), SIMDE_FLOAT32_C(-692.35)}, - {SIMDE_FLOAT32_C(648.71), SIMDE_FLOAT32_C(385.20)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-266318.062500), SIMDE_FLOAT32_C(-267662.781250)}}, - {{SIMDE_FLOAT32_C(479.18), SIMDE_FLOAT32_C(-793.73)}, - {SIMDE_FLOAT32_C(-740.26), SIMDE_FLOAT32_C(245.04)}, - {SIMDE_FLOAT32_C(229.26), SIMDE_FLOAT32_C(-113.23)}, - INT32_C(0), - {SIMDE_FLOAT32_C(170191.187500), SIMDE_FLOAT32_C(168918.281250)}}, - {{SIMDE_FLOAT32_C(331.48), SIMDE_FLOAT32_C(-677.34)}, - {SIMDE_FLOAT32_C(97.30), SIMDE_FLOAT32_C(-52.10)}, - {SIMDE_FLOAT32_C(239.17), SIMDE_FLOAT32_C(469.68)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-22939.761719), SIMDE_FLOAT32_C(-23948.582031)}}, - {{SIMDE_FLOAT32_C(-543.40), SIMDE_FLOAT32_C(826.85)}, - {SIMDE_FLOAT32_C(226.38), SIMDE_FLOAT32_C(178.84)}, - {SIMDE_FLOAT32_C(181.71), SIMDE_FLOAT32_C(420.52)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-41678.914062), SIMDE_FLOAT32_C(-40308.664062)}}, - {{SIMDE_FLOAT32_C(-698.84), SIMDE_FLOAT32_C(-151.15)}, - {SIMDE_FLOAT32_C(-388.27), SIMDE_FLOAT32_C(350.81)}, - {SIMDE_FLOAT32_C(890.40), SIMDE_FLOAT32_C(-664.75)}, - INT32_C(0), - {SIMDE_FLOAT32_C(345016.781250), SIMDE_FLOAT32_C(345564.468750)}}, - {{SIMDE_FLOAT32_C(-617.94), SIMDE_FLOAT32_C(190.84)}, - {SIMDE_FLOAT32_C(218.13), SIMDE_FLOAT32_C(-328.97)}, - {SIMDE_FLOAT32_C(-549.59), SIMDE_FLOAT32_C(-459.89)}, - INT32_C(0), - {SIMDE_FLOAT32_C(119264.132812), SIMDE_FLOAT32_C(120072.914062)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT32_C( 83.21), SIMDE_FLOAT32_C( 417.90) }, + { SIMDE_FLOAT32_C( -875.72), SIMDE_FLOAT32_C( 830.54) }, + { SIMDE_FLOAT32_C( -633.53), SIMDE_FLOAT32_C( 832.17) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-554711.687500), SIMDE_FLOAT32_C(-554377.000000) } }, + { { SIMDE_FLOAT32_C( -890.17), SIMDE_FLOAT32_C( 649.92) }, + { SIMDE_FLOAT32_C( -111.22), SIMDE_FLOAT32_C( -830.36) }, + { SIMDE_FLOAT32_C( 59.76), SIMDE_FLOAT32_C( 970.61) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(5756.336914), SIMDE_FLOAT32_C(7296.427246) } }, + { { SIMDE_FLOAT32_C( 522.31), SIMDE_FLOAT32_C( -822.40) }, + { SIMDE_FLOAT32_C( 411.34), SIMDE_FLOAT32_C( -692.35) }, + { SIMDE_FLOAT32_C( 648.71), SIMDE_FLOAT32_C( 385.20) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-266318.062500), SIMDE_FLOAT32_C(-267662.781250) } }, + { { SIMDE_FLOAT32_C( 479.18), SIMDE_FLOAT32_C( -793.73) }, + { SIMDE_FLOAT32_C( -740.26), SIMDE_FLOAT32_C( 245.04) }, + { SIMDE_FLOAT32_C( 229.26), SIMDE_FLOAT32_C( -113.23) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(170191.187500), SIMDE_FLOAT32_C(168918.281250) } }, + { { SIMDE_FLOAT32_C( 331.48), SIMDE_FLOAT32_C( -677.34) }, + { SIMDE_FLOAT32_C( 97.30), SIMDE_FLOAT32_C( -52.10) }, + { SIMDE_FLOAT32_C( 239.17), SIMDE_FLOAT32_C( 469.68) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-22939.761719), SIMDE_FLOAT32_C(-23948.582031) } }, + { { SIMDE_FLOAT32_C( -543.40), SIMDE_FLOAT32_C( 826.85) }, + { SIMDE_FLOAT32_C( 226.38), SIMDE_FLOAT32_C( 178.84) }, + { SIMDE_FLOAT32_C( 181.71), SIMDE_FLOAT32_C( 420.52) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-41678.914062), SIMDE_FLOAT32_C(-40308.664062) } }, + { { SIMDE_FLOAT32_C( -698.84), SIMDE_FLOAT32_C( -151.15) }, + { SIMDE_FLOAT32_C( -388.27), SIMDE_FLOAT32_C( 350.81) }, + { SIMDE_FLOAT32_C( 890.40), SIMDE_FLOAT32_C( -664.75) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(345016.781250), SIMDE_FLOAT32_C(345564.468750) } }, + { { SIMDE_FLOAT32_C( -617.94), SIMDE_FLOAT32_C( 190.84) }, + { SIMDE_FLOAT32_C( 218.13), SIMDE_FLOAT32_C( -328.97) }, + { SIMDE_FLOAT32_C( -549.59), SIMDE_FLOAT32_C( -459.89) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(119264.132812), SIMDE_FLOAT32_C(120072.914062) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vcmla_rot180_lane_f32(r_, a, b, 0); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 0, 0, 0, 0, 0, 0, 0}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vcmla_rot180_lane_f32(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -200,7 +166,9 @@ static int test_simde_vcmla_rot180_lane_f32(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmla_rot180_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { + +static int +test_simde_vcmla_rot180_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float16_t r_[4]; @@ -209,119 +177,75 @@ static int test_simde_vcmla_rot180_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[4]; } test_vec[] = { - {{SIMDE_FLOAT16_VALUE(816.50), SIMDE_FLOAT16_VALUE(773.50), - SIMDE_FLOAT16_VALUE(-377.75), SIMDE_FLOAT16_VALUE(544.50)}, - {SIMDE_FLOAT16_VALUE(-188.38), SIMDE_FLOAT16_VALUE(269.75), - SIMDE_FLOAT16_VALUE(115.62), SIMDE_FLOAT16_VALUE(-544.00)}, - {SIMDE_FLOAT16_VALUE(42.90), SIMDE_FLOAT16_VALUE(924.50), - SIMDE_FLOAT16_VALUE(484.00), SIMDE_FLOAT16_VALUE(-757.00), - SIMDE_FLOAT16_VALUE(730.00), SIMDE_FLOAT16_VALUE(103.12), - SIMDE_FLOAT16_VALUE(-381.25), SIMDE_FLOAT16_VALUE(898.50)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(8896.00), SIMDE_FLOAT16_VALUE(8856.00), - SIMDE_FLOAT16_VALUE(-5340.00), SIMDE_FLOAT16_VALUE(-4416.00)}}, - {{SIMDE_FLOAT16_VALUE(293.00), SIMDE_FLOAT16_VALUE(-114.06), - SIMDE_FLOAT16_VALUE(-137.62), SIMDE_FLOAT16_VALUE(722.50)}, - {SIMDE_FLOAT16_VALUE(-6.33), SIMDE_FLOAT16_VALUE(161.38), - SIMDE_FLOAT16_VALUE(115.50), SIMDE_FLOAT16_VALUE(70.38)}, - {SIMDE_FLOAT16_VALUE(-899.00), SIMDE_FLOAT16_VALUE(32.51), - SIMDE_FLOAT16_VALUE(359.00), SIMDE_FLOAT16_VALUE(342.25), - SIMDE_FLOAT16_VALUE(913.00), SIMDE_FLOAT16_VALUE(740.50), - SIMDE_FLOAT16_VALUE(-436.25), SIMDE_FLOAT16_VALUE(-904.00)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(498.75), SIMDE_FLOAT16_VALUE(91.62), - SIMDE_FLOAT16_VALUE(-3892.00), SIMDE_FLOAT16_VALUE(-3032.00)}}, - {{SIMDE_FLOAT16_VALUE(-335.25), SIMDE_FLOAT16_VALUE(-252.38), - SIMDE_FLOAT16_VALUE(227.25), SIMDE_FLOAT16_VALUE(-8.67)}, - {SIMDE_FLOAT16_VALUE(490.50), SIMDE_FLOAT16_VALUE(507.50), - SIMDE_FLOAT16_VALUE(-979.50), SIMDE_FLOAT16_VALUE(-718.00)}, - {SIMDE_FLOAT16_VALUE(-10.20), SIMDE_FLOAT16_VALUE(205.75), - SIMDE_FLOAT16_VALUE(-797.50), SIMDE_FLOAT16_VALUE(741.50), - SIMDE_FLOAT16_VALUE(-835.00), SIMDE_FLOAT16_VALUE(-308.00), - SIMDE_FLOAT16_VALUE(-674.50), SIMDE_FLOAT16_VALUE(-247.00)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(4668.00), SIMDE_FLOAT16_VALUE(4752.00), - SIMDE_FLOAT16_VALUE(-9768.00), SIMDE_FLOAT16_VALUE(-10000.00)}}, - {{SIMDE_FLOAT16_VALUE(-210.88), SIMDE_FLOAT16_VALUE(-430.75), - SIMDE_FLOAT16_VALUE(25.50), SIMDE_FLOAT16_VALUE(-44.50)}, - {SIMDE_FLOAT16_VALUE(142.00), SIMDE_FLOAT16_VALUE(-66.00), - SIMDE_FLOAT16_VALUE(-131.50), SIMDE_FLOAT16_VALUE(462.50)}, - {SIMDE_FLOAT16_VALUE(-361.00), SIMDE_FLOAT16_VALUE(85.62), - SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(749.50), - SIMDE_FLOAT16_VALUE(-975.50), SIMDE_FLOAT16_VALUE(383.50), - SIMDE_FLOAT16_VALUE(500.25), SIMDE_FLOAT16_VALUE(517.00)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-12368.00), SIMDE_FLOAT16_VALUE(-12592.00), - SIMDE_FLOAT16_VALUE(11288.00), SIMDE_FLOAT16_VALUE(11216.00)}}, - {{SIMDE_FLOAT16_VALUE(540.00), SIMDE_FLOAT16_VALUE(-927.50), - SIMDE_FLOAT16_VALUE(17.94), SIMDE_FLOAT16_VALUE(340.75)}, - {SIMDE_FLOAT16_VALUE(-397.75), SIMDE_FLOAT16_VALUE(166.12), - SIMDE_FLOAT16_VALUE(854.00), SIMDE_FLOAT16_VALUE(-110.75)}, - {SIMDE_FLOAT16_VALUE(-53.36), SIMDE_FLOAT16_VALUE(-465.00), - SIMDE_FLOAT16_VALUE(-104.00), SIMDE_FLOAT16_VALUE(-348.00), - SIMDE_FLOAT16_VALUE(-370.75), SIMDE_FLOAT16_VALUE(18.20), - SIMDE_FLOAT16_VALUE(-209.25), SIMDE_FLOAT16_VALUE(-856.00)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-20688.00), SIMDE_FLOAT16_VALUE(-22160.00), - SIMDE_FLOAT16_VALUE(45600.00), SIMDE_FLOAT16_VALUE(45920.00)}}, - {{SIMDE_FLOAT16_VALUE(251.25), SIMDE_FLOAT16_VALUE(523.50), - SIMDE_FLOAT16_VALUE(-62.34), SIMDE_FLOAT16_VALUE(-412.00)}, - {SIMDE_FLOAT16_VALUE(818.50), SIMDE_FLOAT16_VALUE(-266.25), - SIMDE_FLOAT16_VALUE(558.50), SIMDE_FLOAT16_VALUE(-474.75)}, - {SIMDE_FLOAT16_VALUE(583.50), SIMDE_FLOAT16_VALUE(58.66), - SIMDE_FLOAT16_VALUE(875.50), SIMDE_FLOAT16_VALUE(382.50), - SIMDE_FLOAT16_VALUE(-535.00), SIMDE_FLOAT16_VALUE(374.50), - SIMDE_FLOAT16_VALUE(89.81), SIMDE_FLOAT16_VALUE(432.50)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-47744.00), SIMDE_FLOAT16_VALUE(-47488.00), - SIMDE_FLOAT16_VALUE(-32832.00), SIMDE_FLOAT16_VALUE(-33184.00)}}, - {{SIMDE_FLOAT16_VALUE(83.19), SIMDE_FLOAT16_VALUE(418.00), - SIMDE_FLOAT16_VALUE(-875.50), SIMDE_FLOAT16_VALUE(830.50)}, - {SIMDE_FLOAT16_VALUE(-633.50), SIMDE_FLOAT16_VALUE(832.00), - SIMDE_FLOAT16_VALUE(-890.00), SIMDE_FLOAT16_VALUE(650.00)}, - {SIMDE_FLOAT16_VALUE(-31.19), SIMDE_FLOAT16_VALUE(-830.50), - SIMDE_FLOAT16_VALUE(59.75), SIMDE_FLOAT16_VALUE(970.50), - SIMDE_FLOAT16_VALUE(522.50), SIMDE_FLOAT16_VALUE(-822.50), - SIMDE_FLOAT16_VALUE(411.25), SIMDE_FLOAT16_VALUE(-692.50)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-19680.00), SIMDE_FLOAT16_VALUE(-19344.00), - SIMDE_FLOAT16_VALUE(-28640.00), SIMDE_FLOAT16_VALUE(-26928.00)}}, - {{SIMDE_FLOAT16_VALUE(648.50), SIMDE_FLOAT16_VALUE(385.25), - SIMDE_FLOAT16_VALUE(479.25), SIMDE_FLOAT16_VALUE(-793.50)}, - {SIMDE_FLOAT16_VALUE(-740.50), SIMDE_FLOAT16_VALUE(245.00), - SIMDE_FLOAT16_VALUE(229.25), SIMDE_FLOAT16_VALUE(-113.25)}, - {SIMDE_FLOAT16_VALUE(331.50), SIMDE_FLOAT16_VALUE(-75.25), - SIMDE_FLOAT16_VALUE(97.31), SIMDE_FLOAT16_VALUE(-52.09), - SIMDE_FLOAT16_VALUE(239.12), SIMDE_FLOAT16_VALUE(469.75), - SIMDE_FLOAT16_VALUE(-543.50), SIMDE_FLOAT16_VALUE(827.00)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-55072.00), SIMDE_FLOAT16_VALUE(-55328.00), - SIMDE_FLOAT16_VALUE(17728.00), SIMDE_FLOAT16_VALUE(16464.00)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT16_VALUE( 816.50), SIMDE_FLOAT16_VALUE( 773.50), SIMDE_FLOAT16_VALUE( -377.75), SIMDE_FLOAT16_VALUE( 544.50) }, + { SIMDE_FLOAT16_VALUE( -188.38), SIMDE_FLOAT16_VALUE( 269.75), SIMDE_FLOAT16_VALUE( 115.62), SIMDE_FLOAT16_VALUE( -544.00) }, + { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( 484.00), SIMDE_FLOAT16_VALUE( -757.00), + SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 8896.00), SIMDE_FLOAT16_VALUE( 8856.00), SIMDE_FLOAT16_VALUE( -5340.00), SIMDE_FLOAT16_VALUE( -4416.00) } }, + { { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, + { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, + { SIMDE_FLOAT16_VALUE( -899.00), SIMDE_FLOAT16_VALUE( 32.51), SIMDE_FLOAT16_VALUE( 359.00), SIMDE_FLOAT16_VALUE( 342.25), + SIMDE_FLOAT16_VALUE( 913.00), SIMDE_FLOAT16_VALUE( 740.50), SIMDE_FLOAT16_VALUE( -436.25), SIMDE_FLOAT16_VALUE( -904.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 498.75), SIMDE_FLOAT16_VALUE( 91.62), SIMDE_FLOAT16_VALUE( -3892.00), SIMDE_FLOAT16_VALUE( -3032.00) } }, + { { SIMDE_FLOAT16_VALUE( -335.25), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( 227.25), SIMDE_FLOAT16_VALUE( -8.67) }, + { SIMDE_FLOAT16_VALUE( 490.50), SIMDE_FLOAT16_VALUE( 507.50), SIMDE_FLOAT16_VALUE( -979.50), SIMDE_FLOAT16_VALUE( -718.00) }, + { SIMDE_FLOAT16_VALUE( -10.20), SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -797.50), SIMDE_FLOAT16_VALUE( 741.50), + SIMDE_FLOAT16_VALUE( -835.00), SIMDE_FLOAT16_VALUE( -308.00), SIMDE_FLOAT16_VALUE( -674.50), SIMDE_FLOAT16_VALUE( -247.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 4668.00), SIMDE_FLOAT16_VALUE( 4752.00), SIMDE_FLOAT16_VALUE( -9768.00), SIMDE_FLOAT16_VALUE(-10000.00) } }, + { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 25.50), SIMDE_FLOAT16_VALUE( -44.50) }, + { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -66.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, + { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 85.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50), + SIMDE_FLOAT16_VALUE( -975.50), SIMDE_FLOAT16_VALUE( 383.50), SIMDE_FLOAT16_VALUE( 500.25), SIMDE_FLOAT16_VALUE( 517.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-12368.00), SIMDE_FLOAT16_VALUE(-12592.00), SIMDE_FLOAT16_VALUE( 11288.00), SIMDE_FLOAT16_VALUE( 11216.00) } }, + { { SIMDE_FLOAT16_VALUE( 540.00), SIMDE_FLOAT16_VALUE( -927.50), SIMDE_FLOAT16_VALUE( 17.94), SIMDE_FLOAT16_VALUE( 340.75) }, + { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( 854.00), SIMDE_FLOAT16_VALUE( -110.75) }, + { SIMDE_FLOAT16_VALUE( -53.36), SIMDE_FLOAT16_VALUE( -465.00), SIMDE_FLOAT16_VALUE( -104.00), SIMDE_FLOAT16_VALUE( -348.00), + SIMDE_FLOAT16_VALUE( -370.75), SIMDE_FLOAT16_VALUE( 18.20), SIMDE_FLOAT16_VALUE( -209.25), SIMDE_FLOAT16_VALUE( -856.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-20688.00), SIMDE_FLOAT16_VALUE(-22160.00), SIMDE_FLOAT16_VALUE( 45600.00), SIMDE_FLOAT16_VALUE( 45920.00) } }, + { { SIMDE_FLOAT16_VALUE( 251.25), SIMDE_FLOAT16_VALUE( 523.50), SIMDE_FLOAT16_VALUE( -62.34), SIMDE_FLOAT16_VALUE( -412.00) }, + { SIMDE_FLOAT16_VALUE( 818.50), SIMDE_FLOAT16_VALUE( -266.25), SIMDE_FLOAT16_VALUE( 558.50), SIMDE_FLOAT16_VALUE( -474.75) }, + { SIMDE_FLOAT16_VALUE( 583.50), SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 875.50), SIMDE_FLOAT16_VALUE( 382.50), + SIMDE_FLOAT16_VALUE( -535.00), SIMDE_FLOAT16_VALUE( 374.50), SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 432.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-47744.00), SIMDE_FLOAT16_VALUE(-47488.00), SIMDE_FLOAT16_VALUE(-32832.00), SIMDE_FLOAT16_VALUE(-33184.00) } }, + { { SIMDE_FLOAT16_VALUE( 83.19), SIMDE_FLOAT16_VALUE( 418.00), SIMDE_FLOAT16_VALUE( -875.50), SIMDE_FLOAT16_VALUE( 830.50) }, + { SIMDE_FLOAT16_VALUE( -633.50), SIMDE_FLOAT16_VALUE( 832.00), SIMDE_FLOAT16_VALUE( -890.00), SIMDE_FLOAT16_VALUE( 650.00) }, + { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -830.50), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( 970.50), + SIMDE_FLOAT16_VALUE( 522.50), SIMDE_FLOAT16_VALUE( -822.50), SIMDE_FLOAT16_VALUE( 411.25), SIMDE_FLOAT16_VALUE( -692.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-19680.00), SIMDE_FLOAT16_VALUE(-19344.00), SIMDE_FLOAT16_VALUE(-28640.00), SIMDE_FLOAT16_VALUE(-26928.00) } }, + { { SIMDE_FLOAT16_VALUE( 648.50), SIMDE_FLOAT16_VALUE( 385.25), SIMDE_FLOAT16_VALUE( 479.25), SIMDE_FLOAT16_VALUE( -793.50) }, + { SIMDE_FLOAT16_VALUE( -740.50), SIMDE_FLOAT16_VALUE( 245.00), SIMDE_FLOAT16_VALUE( 229.25), SIMDE_FLOAT16_VALUE( -113.25) }, + { SIMDE_FLOAT16_VALUE( 331.50), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( 97.31), SIMDE_FLOAT16_VALUE( -52.09), + SIMDE_FLOAT16_VALUE( 239.12), SIMDE_FLOAT16_VALUE( 469.75), SIMDE_FLOAT16_VALUE( -543.50), SIMDE_FLOAT16_VALUE( 827.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-55072.00), SIMDE_FLOAT16_VALUE(-55328.00), SIMDE_FLOAT16_VALUE( 17728.00), SIMDE_FLOAT16_VALUE( 16464.00) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); - - simde_float16x4_t r; - SIMDE_CONSTIFY_2_( - simde_vcmla_rot180_laneq_f16, r, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_(simde_vcmla_rot180_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x4_t r = simde_vcmla_rot180_laneq_f16(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -332,7 +256,8 @@ static int test_simde_vcmla_rot180_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmla_rot180_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmla_rot180_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[2]; @@ -341,78 +266,68 @@ static int test_simde_vcmla_rot180_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[2]; } test_vec[] = { - {{SIMDE_FLOAT32_C(-470.26), SIMDE_FLOAT32_C(81.87)}, - {SIMDE_FLOAT32_C(72.27), SIMDE_FLOAT32_C(-992.59)}, - {SIMDE_FLOAT32_C(-615.67), SIMDE_FLOAT32_C(620.28), - SIMDE_FLOAT32_C(-606.51), SIMDE_FLOAT32_C(-327.77)}, - INT32_C(0), - {SIMDE_FLOAT32_C(44024.207031), SIMDE_FLOAT32_C(44576.335938)}}, - {{SIMDE_FLOAT32_C(-331.80), SIMDE_FLOAT32_C(-605.94)}, - {SIMDE_FLOAT32_C(-295.86), SIMDE_FLOAT32_C(-275.42)}, - {SIMDE_FLOAT32_C(-752.64), SIMDE_FLOAT32_C(21.91), - SIMDE_FLOAT32_C(826.83), SIMDE_FLOAT32_C(599.82)}, - INT32_C(1), - {SIMDE_FLOAT32_C(6150.492188), SIMDE_FLOAT32_C(5876.352051)}}, - {{SIMDE_FLOAT32_C(-30.36), SIMDE_FLOAT32_C(631.53)}, - {SIMDE_FLOAT32_C(850.75), SIMDE_FLOAT32_C(-263.55)}, - {SIMDE_FLOAT32_C(139.96), SIMDE_FLOAT32_C(859.14), - SIMDE_FLOAT32_C(-834.47), SIMDE_FLOAT32_C(216.10)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-119101.335938), SIMDE_FLOAT32_C(-118439.445312)}}, - {{SIMDE_FLOAT32_C(995.86), SIMDE_FLOAT32_C(529.74)}, - {SIMDE_FLOAT32_C(79.08), SIMDE_FLOAT32_C(947.13)}, - {SIMDE_FLOAT32_C(122.02), SIMDE_FLOAT32_C(-250.00), - SIMDE_FLOAT32_C(-361.82), SIMDE_FLOAT32_C(265.24)}, - INT32_C(1), - {SIMDE_FLOAT32_C(20765.861328), SIMDE_FLOAT32_C(20299.740234)}}, - {{SIMDE_FLOAT32_C(275.71), SIMDE_FLOAT32_C(2.71)}, - {SIMDE_FLOAT32_C(99.79), SIMDE_FLOAT32_C(-137.67)}, - {SIMDE_FLOAT32_C(-761.19), SIMDE_FLOAT32_C(813.19), - SIMDE_FLOAT32_C(-897.68), SIMDE_FLOAT32_C(653.58)}, - INT32_C(0), - {SIMDE_FLOAT32_C(76234.859375), SIMDE_FLOAT32_C(75961.859375)}}, - {{SIMDE_FLOAT32_C(396.02), SIMDE_FLOAT32_C(413.06)}, - {SIMDE_FLOAT32_C(514.09), SIMDE_FLOAT32_C(-977.67)}, - {SIMDE_FLOAT32_C(-671.79), SIMDE_FLOAT32_C(-92.13), - SIMDE_FLOAT32_C(-441.32), SIMDE_FLOAT32_C(-374.27)}, - INT32_C(1), - {SIMDE_FLOAT32_C(47759.132812), SIMDE_FLOAT32_C(47776.171875)}}, - {{SIMDE_FLOAT32_C(-151.97), SIMDE_FLOAT32_C(-79.55)}, - {SIMDE_FLOAT32_C(-214.62), SIMDE_FLOAT32_C(-614.75)}, - {SIMDE_FLOAT32_C(678.79), SIMDE_FLOAT32_C(783.83), - SIMDE_FLOAT32_C(493.05), SIMDE_FLOAT32_C(-896.00)}, - INT32_C(0), - {SIMDE_FLOAT32_C(145529.937500), SIMDE_FLOAT32_C(145602.359375)}}, - {{SIMDE_FLOAT32_C(104.13), SIMDE_FLOAT32_C(278.54)}, - {SIMDE_FLOAT32_C(171.54), SIMDE_FLOAT32_C(-682.63)}, - {SIMDE_FLOAT32_C(217.09), SIMDE_FLOAT32_C(49.35), - SIMDE_FLOAT32_C(256.50), SIMDE_FLOAT32_C(-92.04)}, - INT32_C(1), - {SIMDE_FLOAT32_C(-8361.368164), SIMDE_FLOAT32_C(-8186.958496)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT32_C( -470.26), SIMDE_FLOAT32_C( 81.87) }, + { SIMDE_FLOAT32_C( 72.27), SIMDE_FLOAT32_C( -992.59) }, + { SIMDE_FLOAT32_C( -615.67), SIMDE_FLOAT32_C( 620.28), SIMDE_FLOAT32_C( -606.51), SIMDE_FLOAT32_C( -327.77) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(44024.207031), SIMDE_FLOAT32_C(44576.335938) } }, + { { SIMDE_FLOAT32_C( -331.80), SIMDE_FLOAT32_C( -605.94) }, + { SIMDE_FLOAT32_C( -295.86), SIMDE_FLOAT32_C( -275.42) }, + { SIMDE_FLOAT32_C( -752.64), SIMDE_FLOAT32_C( 21.91), SIMDE_FLOAT32_C( 826.83), SIMDE_FLOAT32_C( 599.82) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(6150.492188), SIMDE_FLOAT32_C(5876.352051) } }, + { { SIMDE_FLOAT32_C( -30.36), SIMDE_FLOAT32_C( 631.53) }, + { SIMDE_FLOAT32_C( 850.75), SIMDE_FLOAT32_C( -263.55) }, + { SIMDE_FLOAT32_C( 139.96), SIMDE_FLOAT32_C( 859.14), SIMDE_FLOAT32_C( -834.47), SIMDE_FLOAT32_C( 216.10) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-119101.335938), SIMDE_FLOAT32_C(-118439.445312) } }, + { { SIMDE_FLOAT32_C( 995.86), SIMDE_FLOAT32_C( 529.74) }, + { SIMDE_FLOAT32_C( 79.08), SIMDE_FLOAT32_C( 947.13) }, + { SIMDE_FLOAT32_C( 122.02), SIMDE_FLOAT32_C( -250.00), SIMDE_FLOAT32_C( -361.82), SIMDE_FLOAT32_C( 265.24) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(20765.861328), SIMDE_FLOAT32_C(20299.740234) } }, + { { SIMDE_FLOAT32_C( 275.71), SIMDE_FLOAT32_C( 2.71) }, + { SIMDE_FLOAT32_C( 99.79), SIMDE_FLOAT32_C( -137.67) }, + { SIMDE_FLOAT32_C( -761.19), SIMDE_FLOAT32_C( 813.19), SIMDE_FLOAT32_C( -897.68), SIMDE_FLOAT32_C( 653.58) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(76234.859375), SIMDE_FLOAT32_C(75961.859375) } }, + { { SIMDE_FLOAT32_C( 396.02), SIMDE_FLOAT32_C( 413.06) }, + { SIMDE_FLOAT32_C( 514.09), SIMDE_FLOAT32_C( -977.67) }, + { SIMDE_FLOAT32_C( -671.79), SIMDE_FLOAT32_C( -92.13), SIMDE_FLOAT32_C( -441.32), SIMDE_FLOAT32_C( -374.27) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(47759.132812), SIMDE_FLOAT32_C(47776.171875) } }, + { { SIMDE_FLOAT32_C( -151.97), SIMDE_FLOAT32_C( -79.55) }, + { SIMDE_FLOAT32_C( -214.62), SIMDE_FLOAT32_C( -614.75) }, + { SIMDE_FLOAT32_C( 678.79), SIMDE_FLOAT32_C( 783.83), SIMDE_FLOAT32_C( 493.05), SIMDE_FLOAT32_C( -896.00) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(145529.937500), SIMDE_FLOAT32_C(145602.359375) } }, + { { SIMDE_FLOAT32_C( 104.13), SIMDE_FLOAT32_C( 278.54) }, + { SIMDE_FLOAT32_C( 171.54), SIMDE_FLOAT32_C( -682.63) }, + { SIMDE_FLOAT32_C( 217.09), SIMDE_FLOAT32_C( 49.35), SIMDE_FLOAT32_C( 256.50), SIMDE_FLOAT32_C( -92.04) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-8361.368164), SIMDE_FLOAT32_C(-8186.958496) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x2_t r; - SIMDE_CONSTIFY_2_( - simde_vcmla_rot180_laneq_f32, r, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), - test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_(simde_vcmla_rot180_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vcmla_rot180_laneq_f32(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -423,7 +338,8 @@ static int test_simde_vcmla_rot180_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmlaq_rot180_lane_f16(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmlaq_rot180_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float16_t r_[8]; @@ -432,154 +348,92 @@ static int test_simde_vcmlaq_rot180_lane_f16(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[8]; } test_vec[] = { - - {{SIMDE_FLOAT16_VALUE(226.38), SIMDE_FLOAT16_VALUE(178.88), - SIMDE_FLOAT16_VALUE(181.75), SIMDE_FLOAT16_VALUE(420.50), - SIMDE_FLOAT16_VALUE(-699.00), SIMDE_FLOAT16_VALUE(-151.12), - SIMDE_FLOAT16_VALUE(-388.25), SIMDE_FLOAT16_VALUE(350.75)}, - {SIMDE_FLOAT16_VALUE(890.50), SIMDE_FLOAT16_VALUE(-665.00), - SIMDE_FLOAT16_VALUE(-618.00), SIMDE_FLOAT16_VALUE(190.88), - SIMDE_FLOAT16_VALUE(218.12), SIMDE_FLOAT16_VALUE(-329.00), - SIMDE_FLOAT16_VALUE(-549.50), SIMDE_FLOAT16_VALUE(-460.00)}, - {SIMDE_FLOAT16_VALUE(-49.28), SIMDE_FLOAT16_VALUE(-695.50), - SIMDE_FLOAT16_VALUE(121.50), SIMDE_FLOAT16_VALUE(-78.44)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(44096.00), SIMDE_FLOAT16_VALUE(44064.00), - SIMDE_FLOAT16_VALUE(-30272.00), SIMDE_FLOAT16_VALUE(-30032.00), - SIMDE_FLOAT16_VALUE(10048.00), SIMDE_FLOAT16_VALUE(10600.00), - SIMDE_FLOAT16_VALUE(-27472.00), SIMDE_FLOAT16_VALUE(-26736.00)}}, - {{SIMDE_FLOAT16_VALUE(-869.50), SIMDE_FLOAT16_VALUE(-710.00), - SIMDE_FLOAT16_VALUE(457.25), SIMDE_FLOAT16_VALUE(579.00), - SIMDE_FLOAT16_VALUE(-786.50), SIMDE_FLOAT16_VALUE(-974.00), - SIMDE_FLOAT16_VALUE(-570.50), SIMDE_FLOAT16_VALUE(369.25)}, - {SIMDE_FLOAT16_VALUE(48.06), SIMDE_FLOAT16_VALUE(-40.00), - SIMDE_FLOAT16_VALUE(-52.00), SIMDE_FLOAT16_VALUE(75.88), - SIMDE_FLOAT16_VALUE(352.25), SIMDE_FLOAT16_VALUE(324.25), - SIMDE_FLOAT16_VALUE(485.50), SIMDE_FLOAT16_VALUE(43.50)}, - {SIMDE_FLOAT16_VALUE(-976.00), SIMDE_FLOAT16_VALUE(89.44), - SIMDE_FLOAT16_VALUE(-100.62), SIMDE_FLOAT16_VALUE(-463.75)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-5168.00), SIMDE_FLOAT16_VALUE(-5008.00), - SIMDE_FLOAT16_VALUE(5108.00), SIMDE_FLOAT16_VALUE(5228.00), - SIMDE_FLOAT16_VALUE(-32288.00), SIMDE_FLOAT16_VALUE(-32480.00), - SIMDE_FLOAT16_VALUE(-44000.00), SIMDE_FLOAT16_VALUE(-43040.00)}}, - {{SIMDE_FLOAT16_VALUE(-973.00), SIMDE_FLOAT16_VALUE(-159.62), - SIMDE_FLOAT16_VALUE(-377.00), SIMDE_FLOAT16_VALUE(-853.00), - SIMDE_FLOAT16_VALUE(831.00), SIMDE_FLOAT16_VALUE(661.00), - SIMDE_FLOAT16_VALUE(-717.00), SIMDE_FLOAT16_VALUE(891.50)}, - {SIMDE_FLOAT16_VALUE(535.50), SIMDE_FLOAT16_VALUE(621.50), - SIMDE_FLOAT16_VALUE(578.50), SIMDE_FLOAT16_VALUE(430.50), - SIMDE_FLOAT16_VALUE(-574.50), SIMDE_FLOAT16_VALUE(-319.75), - SIMDE_FLOAT16_VALUE(-735.00), SIMDE_FLOAT16_VALUE(248.50)}, - {SIMDE_FLOAT16_VALUE(32.59), SIMDE_FLOAT16_VALUE(192.38), - SIMDE_FLOAT16_VALUE(679.00), SIMDE_FLOAT16_VALUE(502.25)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-18432.00), SIMDE_FLOAT16_VALUE(-17616.00), - SIMDE_FLOAT16_VALUE(-19232.00), SIMDE_FLOAT16_VALUE(-19712.00), - SIMDE_FLOAT16_VALUE(19552.00), SIMDE_FLOAT16_VALUE(19392.00), - SIMDE_FLOAT16_VALUE(23232.00), SIMDE_FLOAT16_VALUE(24848.00)}}, - {{SIMDE_FLOAT16_VALUE(-618.50), SIMDE_FLOAT16_VALUE(-70.81), - SIMDE_FLOAT16_VALUE(-596.50), SIMDE_FLOAT16_VALUE(810.50), - SIMDE_FLOAT16_VALUE(863.50), SIMDE_FLOAT16_VALUE(828.50), - SIMDE_FLOAT16_VALUE(-563.50), SIMDE_FLOAT16_VALUE(-576.50)}, - {SIMDE_FLOAT16_VALUE(-703.50), SIMDE_FLOAT16_VALUE(384.00), - SIMDE_FLOAT16_VALUE(-772.50), SIMDE_FLOAT16_VALUE(457.50), - SIMDE_FLOAT16_VALUE(296.00), SIMDE_FLOAT16_VALUE(653.00), - SIMDE_FLOAT16_VALUE(-121.00), SIMDE_FLOAT16_VALUE(945.50)}, - {SIMDE_FLOAT16_VALUE(-280.75), SIMDE_FLOAT16_VALUE(-31.45), - SIMDE_FLOAT16_VALUE(688.50), SIMDE_FLOAT16_VALUE(192.00)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-22752.00), SIMDE_FLOAT16_VALUE(-22192.00), - SIMDE_FLOAT16_VALUE(-24896.00), SIMDE_FLOAT16_VALUE(-23488.00), - SIMDE_FLOAT16_VALUE(10176.00), SIMDE_FLOAT16_VALUE(10136.00), - SIMDE_FLOAT16_VALUE(-4368.00), SIMDE_FLOAT16_VALUE(-4384.00)}}, - {{SIMDE_FLOAT16_VALUE(-523.00), SIMDE_FLOAT16_VALUE(-323.75), - SIMDE_FLOAT16_VALUE(-888.00), SIMDE_FLOAT16_VALUE(-283.75), - SIMDE_FLOAT16_VALUE(-117.75), SIMDE_FLOAT16_VALUE(-841.50), - SIMDE_FLOAT16_VALUE(665.00), SIMDE_FLOAT16_VALUE(-987.00)}, - {SIMDE_FLOAT16_VALUE(-643.00), SIMDE_FLOAT16_VALUE(-152.12), - SIMDE_FLOAT16_VALUE(964.00), SIMDE_FLOAT16_VALUE(920.00), - SIMDE_FLOAT16_VALUE(630.50), SIMDE_FLOAT16_VALUE(-669.50), - SIMDE_FLOAT16_VALUE(671.00), SIMDE_FLOAT16_VALUE(257.00)}, - {SIMDE_FLOAT16_VALUE(10.22), SIMDE_FLOAT16_VALUE(-857.50), - SIMDE_FLOAT16_VALUE(334.75), SIMDE_FLOAT16_VALUE(-617.50)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(6048.00), SIMDE_FLOAT16_VALUE(6248.00), - SIMDE_FLOAT16_VALUE(-10736.00), SIMDE_FLOAT16_VALUE(-10136.00), - SIMDE_FLOAT16_VALUE(-6560.00), SIMDE_FLOAT16_VALUE(-7284.00), - SIMDE_FLOAT16_VALUE(-6192.00), SIMDE_FLOAT16_VALUE(-7844.00)}}, - {{SIMDE_FLOAT16_VALUE(-439.50), SIMDE_FLOAT16_VALUE(245.12), - SIMDE_FLOAT16_VALUE(111.06), SIMDE_FLOAT16_VALUE(520.50), - SIMDE_FLOAT16_VALUE(85.50), SIMDE_FLOAT16_VALUE(250.25), - SIMDE_FLOAT16_VALUE(-680.00), SIMDE_FLOAT16_VALUE(-750.00)}, - {SIMDE_FLOAT16_VALUE(-138.25), SIMDE_FLOAT16_VALUE(-14.62), - SIMDE_FLOAT16_VALUE(-921.50), SIMDE_FLOAT16_VALUE(225.88), - SIMDE_FLOAT16_VALUE(242.88), SIMDE_FLOAT16_VALUE(869.50), - SIMDE_FLOAT16_VALUE(298.00), SIMDE_FLOAT16_VALUE(105.69)}, - {SIMDE_FLOAT16_VALUE(-722.50), SIMDE_FLOAT16_VALUE(-8.75), - SIMDE_FLOAT16_VALUE(-245.75), SIMDE_FLOAT16_VALUE(915.50)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-1649.00), SIMDE_FLOAT16_VALUE(-964.50), - SIMDE_FLOAT16_VALUE(-7952.00), SIMDE_FLOAT16_VALUE(-7544.00), - SIMDE_FLOAT16_VALUE(2210.00), SIMDE_FLOAT16_VALUE(2376.00), - SIMDE_FLOAT16_VALUE(1928.00), SIMDE_FLOAT16_VALUE(1858.00)}}, - {{SIMDE_FLOAT16_VALUE(54.19), SIMDE_FLOAT16_VALUE(-928.00), - SIMDE_FLOAT16_VALUE(362.50), SIMDE_FLOAT16_VALUE(-936.50), - SIMDE_FLOAT16_VALUE(185.88), SIMDE_FLOAT16_VALUE(-244.38), - SIMDE_FLOAT16_VALUE(924.50), SIMDE_FLOAT16_VALUE(-644.00)}, - {SIMDE_FLOAT16_VALUE(-517.00), SIMDE_FLOAT16_VALUE(-615.00), - SIMDE_FLOAT16_VALUE(-751.50), SIMDE_FLOAT16_VALUE(-974.00), - SIMDE_FLOAT16_VALUE(-144.38), SIMDE_FLOAT16_VALUE(338.25), - SIMDE_FLOAT16_VALUE(705.00), SIMDE_FLOAT16_VALUE(116.88)}, - {SIMDE_FLOAT16_VALUE(49.38), SIMDE_FLOAT16_VALUE(-363.00), - SIMDE_FLOAT16_VALUE(-476.25), SIMDE_FLOAT16_VALUE(106.69)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(25584.00), SIMDE_FLOAT16_VALUE(24592.00), - SIMDE_FLOAT16_VALUE(37472.00), SIMDE_FLOAT16_VALUE(36160.00), - SIMDE_FLOAT16_VALUE(7316.00), SIMDE_FLOAT16_VALUE(6884.00), - SIMDE_FLOAT16_VALUE(-33888.00), SIMDE_FLOAT16_VALUE(-35456.00)}}, - {{SIMDE_FLOAT16_VALUE(-726.00), SIMDE_FLOAT16_VALUE(-353.75), - SIMDE_FLOAT16_VALUE(268.50), SIMDE_FLOAT16_VALUE(729.00), - SIMDE_FLOAT16_VALUE(-470.25), SIMDE_FLOAT16_VALUE(81.88), - SIMDE_FLOAT16_VALUE(72.25), SIMDE_FLOAT16_VALUE(-992.50)}, - {SIMDE_FLOAT16_VALUE(-615.50), SIMDE_FLOAT16_VALUE(620.50), - SIMDE_FLOAT16_VALUE(-606.50), SIMDE_FLOAT16_VALUE(-327.75), - SIMDE_FLOAT16_VALUE(-331.75), SIMDE_FLOAT16_VALUE(-606.00), - SIMDE_FLOAT16_VALUE(-295.75), SIMDE_FLOAT16_VALUE(-275.50)}, - {SIMDE_FLOAT16_VALUE(-752.50), SIMDE_FLOAT16_VALUE(21.91), - SIMDE_FLOAT16_VALUE(827.00), SIMDE_FLOAT16_VALUE(600.00)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(12760.00), SIMDE_FLOAT16_VALUE(13128.00), - SIMDE_FLOAT16_VALUE(13552.00), SIMDE_FLOAT16_VALUE(14016.00), - SIMDE_FLOAT16_VALUE(6796.00), SIMDE_FLOAT16_VALUE(7348.00), - SIMDE_FLOAT16_VALUE(6552.00), SIMDE_FLOAT16_VALUE(5488.00)}} - + { { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( 420.50), + SIMDE_FLOAT16_VALUE( -699.00), SIMDE_FLOAT16_VALUE( -151.12), SIMDE_FLOAT16_VALUE( -388.25), SIMDE_FLOAT16_VALUE( 350.75) }, + { SIMDE_FLOAT16_VALUE( 890.50), SIMDE_FLOAT16_VALUE( -665.00), SIMDE_FLOAT16_VALUE( -618.00), SIMDE_FLOAT16_VALUE( 190.88), + SIMDE_FLOAT16_VALUE( 218.12), SIMDE_FLOAT16_VALUE( -329.00), SIMDE_FLOAT16_VALUE( -549.50), SIMDE_FLOAT16_VALUE( -460.00) }, + { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -695.50), SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 44096.00), SIMDE_FLOAT16_VALUE( 44064.00), SIMDE_FLOAT16_VALUE(-30272.00), SIMDE_FLOAT16_VALUE(-30032.00), + SIMDE_FLOAT16_VALUE( 10048.00), SIMDE_FLOAT16_VALUE( 10600.00), SIMDE_FLOAT16_VALUE(-27472.00), SIMDE_FLOAT16_VALUE(-26736.00) } }, + { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00), + SIMDE_FLOAT16_VALUE( -786.50), SIMDE_FLOAT16_VALUE( -974.00), SIMDE_FLOAT16_VALUE( -570.50), SIMDE_FLOAT16_VALUE( 369.25) }, + { SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( -40.00), SIMDE_FLOAT16_VALUE( -52.00), SIMDE_FLOAT16_VALUE( 75.88), + SIMDE_FLOAT16_VALUE( 352.25), SIMDE_FLOAT16_VALUE( 324.25), SIMDE_FLOAT16_VALUE( 485.50), SIMDE_FLOAT16_VALUE( 43.50) }, + { SIMDE_FLOAT16_VALUE( -976.00), SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -100.62), SIMDE_FLOAT16_VALUE( -463.75) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -5168.00), SIMDE_FLOAT16_VALUE( -5008.00), SIMDE_FLOAT16_VALUE( 5108.00), SIMDE_FLOAT16_VALUE( 5228.00), + SIMDE_FLOAT16_VALUE(-32288.00), SIMDE_FLOAT16_VALUE(-32480.00), SIMDE_FLOAT16_VALUE(-44000.00), SIMDE_FLOAT16_VALUE(-43040.00) } }, + { { SIMDE_FLOAT16_VALUE( -973.00), SIMDE_FLOAT16_VALUE( -159.62), SIMDE_FLOAT16_VALUE( -377.00), SIMDE_FLOAT16_VALUE( -853.00), + SIMDE_FLOAT16_VALUE( 831.00), SIMDE_FLOAT16_VALUE( 661.00), SIMDE_FLOAT16_VALUE( -717.00), SIMDE_FLOAT16_VALUE( 891.50) }, + { SIMDE_FLOAT16_VALUE( 535.50), SIMDE_FLOAT16_VALUE( 621.50), SIMDE_FLOAT16_VALUE( 578.50), SIMDE_FLOAT16_VALUE( 430.50), + SIMDE_FLOAT16_VALUE( -574.50), SIMDE_FLOAT16_VALUE( -319.75), SIMDE_FLOAT16_VALUE( -735.00), SIMDE_FLOAT16_VALUE( 248.50) }, + { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 192.38), SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 502.25) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-18432.00), SIMDE_FLOAT16_VALUE(-17616.00), SIMDE_FLOAT16_VALUE(-19232.00), SIMDE_FLOAT16_VALUE(-19712.00), + SIMDE_FLOAT16_VALUE( 19552.00), SIMDE_FLOAT16_VALUE( 19392.00), SIMDE_FLOAT16_VALUE( 23232.00), SIMDE_FLOAT16_VALUE( 24848.00) } }, + { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( -70.81), SIMDE_FLOAT16_VALUE( -596.50), SIMDE_FLOAT16_VALUE( 810.50), + SIMDE_FLOAT16_VALUE( 863.50), SIMDE_FLOAT16_VALUE( 828.50), SIMDE_FLOAT16_VALUE( -563.50), SIMDE_FLOAT16_VALUE( -576.50) }, + { SIMDE_FLOAT16_VALUE( -703.50), SIMDE_FLOAT16_VALUE( 384.00), SIMDE_FLOAT16_VALUE( -772.50), SIMDE_FLOAT16_VALUE( 457.50), + SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( 653.00), SIMDE_FLOAT16_VALUE( -121.00), SIMDE_FLOAT16_VALUE( 945.50) }, + { SIMDE_FLOAT16_VALUE( -280.75), SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( 688.50), SIMDE_FLOAT16_VALUE( 192.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-22752.00), SIMDE_FLOAT16_VALUE(-22192.00), SIMDE_FLOAT16_VALUE(-24896.00), SIMDE_FLOAT16_VALUE(-23488.00), + SIMDE_FLOAT16_VALUE( 10176.00), SIMDE_FLOAT16_VALUE( 10136.00), SIMDE_FLOAT16_VALUE( -4368.00), SIMDE_FLOAT16_VALUE( -4384.00) } }, + { { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -323.75), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -283.75), + SIMDE_FLOAT16_VALUE( -117.75), SIMDE_FLOAT16_VALUE( -841.50), SIMDE_FLOAT16_VALUE( 665.00), SIMDE_FLOAT16_VALUE( -987.00) }, + { SIMDE_FLOAT16_VALUE( -643.00), SIMDE_FLOAT16_VALUE( -152.12), SIMDE_FLOAT16_VALUE( 964.00), SIMDE_FLOAT16_VALUE( 920.00), + SIMDE_FLOAT16_VALUE( 630.50), SIMDE_FLOAT16_VALUE( -669.50), SIMDE_FLOAT16_VALUE( 671.00), SIMDE_FLOAT16_VALUE( 257.00) }, + { SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -857.50), SIMDE_FLOAT16_VALUE( 334.75), SIMDE_FLOAT16_VALUE( -617.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 6048.00), SIMDE_FLOAT16_VALUE( 6248.00), SIMDE_FLOAT16_VALUE(-10736.00), SIMDE_FLOAT16_VALUE(-10136.00), + SIMDE_FLOAT16_VALUE( -6560.00), SIMDE_FLOAT16_VALUE( -7284.00), SIMDE_FLOAT16_VALUE( -6192.00), SIMDE_FLOAT16_VALUE( -7844.00) } }, + { { SIMDE_FLOAT16_VALUE( -439.50), SIMDE_FLOAT16_VALUE( 245.12), SIMDE_FLOAT16_VALUE( 111.06), SIMDE_FLOAT16_VALUE( 520.50), + SIMDE_FLOAT16_VALUE( 85.50), SIMDE_FLOAT16_VALUE( 250.25), SIMDE_FLOAT16_VALUE( -680.00), SIMDE_FLOAT16_VALUE( -750.00) }, + { SIMDE_FLOAT16_VALUE( -138.25), SIMDE_FLOAT16_VALUE( -14.62), SIMDE_FLOAT16_VALUE( -921.50), SIMDE_FLOAT16_VALUE( 225.88), + SIMDE_FLOAT16_VALUE( 242.88), SIMDE_FLOAT16_VALUE( 869.50), SIMDE_FLOAT16_VALUE( 298.00), SIMDE_FLOAT16_VALUE( 105.69) }, + { SIMDE_FLOAT16_VALUE( -722.50), SIMDE_FLOAT16_VALUE( -8.75), SIMDE_FLOAT16_VALUE( -245.75), SIMDE_FLOAT16_VALUE( 915.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -1649.00), SIMDE_FLOAT16_VALUE( -964.50), SIMDE_FLOAT16_VALUE( -7952.00), SIMDE_FLOAT16_VALUE( -7544.00), + SIMDE_FLOAT16_VALUE( 2210.00), SIMDE_FLOAT16_VALUE( 2376.00), SIMDE_FLOAT16_VALUE( 1928.00), SIMDE_FLOAT16_VALUE( 1858.00) } }, + { { SIMDE_FLOAT16_VALUE( 54.19), SIMDE_FLOAT16_VALUE( -928.00), SIMDE_FLOAT16_VALUE( 362.50), SIMDE_FLOAT16_VALUE( -936.50), + SIMDE_FLOAT16_VALUE( 185.88), SIMDE_FLOAT16_VALUE( -244.38), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( -644.00) }, + { SIMDE_FLOAT16_VALUE( -517.00), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -751.50), SIMDE_FLOAT16_VALUE( -974.00), + SIMDE_FLOAT16_VALUE( -144.38), SIMDE_FLOAT16_VALUE( 338.25), SIMDE_FLOAT16_VALUE( 705.00), SIMDE_FLOAT16_VALUE( 116.88) }, + { SIMDE_FLOAT16_VALUE( 49.38), SIMDE_FLOAT16_VALUE( -363.00), SIMDE_FLOAT16_VALUE( -476.25), SIMDE_FLOAT16_VALUE( 106.69) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 25584.00), SIMDE_FLOAT16_VALUE( 24592.00), SIMDE_FLOAT16_VALUE( 37472.00), SIMDE_FLOAT16_VALUE( 36160.00), + SIMDE_FLOAT16_VALUE( 7316.00), SIMDE_FLOAT16_VALUE( 6884.00), SIMDE_FLOAT16_VALUE(-33888.00), SIMDE_FLOAT16_VALUE(-35456.00) } }, + { { SIMDE_FLOAT16_VALUE( -726.00), SIMDE_FLOAT16_VALUE( -353.75), SIMDE_FLOAT16_VALUE( 268.50), SIMDE_FLOAT16_VALUE( 729.00), + SIMDE_FLOAT16_VALUE( -470.25), SIMDE_FLOAT16_VALUE( 81.88), SIMDE_FLOAT16_VALUE( 72.25), SIMDE_FLOAT16_VALUE( -992.50) }, + { SIMDE_FLOAT16_VALUE( -615.50), SIMDE_FLOAT16_VALUE( 620.50), SIMDE_FLOAT16_VALUE( -606.50), SIMDE_FLOAT16_VALUE( -327.75), + SIMDE_FLOAT16_VALUE( -331.75), SIMDE_FLOAT16_VALUE( -606.00), SIMDE_FLOAT16_VALUE( -295.75), SIMDE_FLOAT16_VALUE( -275.50) }, + { SIMDE_FLOAT16_VALUE( -752.50), SIMDE_FLOAT16_VALUE( 21.91), SIMDE_FLOAT16_VALUE( 827.00), SIMDE_FLOAT16_VALUE( 600.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 12760.00), SIMDE_FLOAT16_VALUE( 13128.00), SIMDE_FLOAT16_VALUE( 13552.00), SIMDE_FLOAT16_VALUE( 14016.00), + SIMDE_FLOAT16_VALUE( 6796.00), SIMDE_FLOAT16_VALUE( 7348.00), SIMDE_FLOAT16_VALUE( 6552.00), SIMDE_FLOAT16_VALUE( 5488.00) } } }; - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x8_t r; - SIMDE_CONSTIFY_2_( - simde_vcmlaq_rot180_lane_f16, r, - (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_(simde_vcmlaq_rot180_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); - simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), - 1); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x8_t r = simde_vcmlaq_lane_f16(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -590,7 +444,8 @@ static int test_simde_vcmlaq_rot180_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmlaq_rot180_lane_f32(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmlaq_rot180_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[4]; @@ -599,90 +454,66 @@ static int test_simde_vcmlaq_rot180_lane_f32(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[4]; } test_vec[] = { - {{SIMDE_FLOAT32_C(301.65), SIMDE_FLOAT32_C(490.71), - SIMDE_FLOAT32_C(-744.66), SIMDE_FLOAT32_C(-738.17)}, - {SIMDE_FLOAT32_C(-301.20), SIMDE_FLOAT32_C(-904.34), - SIMDE_FLOAT32_C(771.98), SIMDE_FLOAT32_C(233.71)}, - {SIMDE_FLOAT32_C(830.18), SIMDE_FLOAT32_C(979.39)}, - INT32_C(0), - {SIMDE_FLOAT32_C(250351.875000), SIMDE_FLOAT32_C(250540.937500), - SIMDE_FLOAT32_C(-641627.000000), SIMDE_FLOAT32_C(-641620.500000)}}, - {{SIMDE_FLOAT32_C(-38.01), SIMDE_FLOAT32_C(151.61), - SIMDE_FLOAT32_C(201.45), SIMDE_FLOAT32_C(-747.32)}, - {SIMDE_FLOAT32_C(-331.17), SIMDE_FLOAT32_C(7.62), - SIMDE_FLOAT32_C(-454.77), SIMDE_FLOAT32_C(-381.81)}, - {SIMDE_FLOAT32_C(236.39), SIMDE_FLOAT32_C(-158.94)}, - INT32_C(0), - {SIMDE_FLOAT32_C(78247.265625), SIMDE_FLOAT32_C(78436.890625), - SIMDE_FLOAT32_C(107704.531250), SIMDE_FLOAT32_C(106755.757812)}}, - {{SIMDE_FLOAT32_C(605.85), SIMDE_FLOAT32_C(244.27), - SIMDE_FLOAT32_C(-426.53), SIMDE_FLOAT32_C(-969.18)}, - {SIMDE_FLOAT32_C(322.13), SIMDE_FLOAT32_C(863.77), - SIMDE_FLOAT32_C(-685.35), SIMDE_FLOAT32_C(-710.70)}, - {SIMDE_FLOAT32_C(-791.12), SIMDE_FLOAT32_C(373.53)}, - INT32_C(0), - {SIMDE_FLOAT32_C(255449.343750), SIMDE_FLOAT32_C(255087.765625), - SIMDE_FLOAT32_C(-542620.625000), SIMDE_FLOAT32_C(-543163.250000)}}, - {{SIMDE_FLOAT32_C(-606.46), SIMDE_FLOAT32_C(-507.55), - SIMDE_FLOAT32_C(-68.24), SIMDE_FLOAT32_C(-823.05)}, - {SIMDE_FLOAT32_C(-359.95), SIMDE_FLOAT32_C(611.92), - SIMDE_FLOAT32_C(514.14), SIMDE_FLOAT32_C(-660.86)}, - {SIMDE_FLOAT32_C(181.81), SIMDE_FLOAT32_C(115.86)}, - INT32_C(0), - {SIMDE_FLOAT32_C(64836.050781), SIMDE_FLOAT32_C(64934.960938), - SIMDE_FLOAT32_C(-93544.031250), SIMDE_FLOAT32_C(-94298.843750)}}, - {{SIMDE_FLOAT32_C(951.84), SIMDE_FLOAT32_C(-864.96), - SIMDE_FLOAT32_C(970.59), SIMDE_FLOAT32_C(769.97)}, - {SIMDE_FLOAT32_C(609.64), SIMDE_FLOAT32_C(-580.87), - SIMDE_FLOAT32_C(358.59), SIMDE_FLOAT32_C(350.56)}, - {SIMDE_FLOAT32_C(26.31), SIMDE_FLOAT32_C(163.74)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-15087.788086), SIMDE_FLOAT32_C(-16904.587891), - SIMDE_FLOAT32_C(-8463.912109), SIMDE_FLOAT32_C(-8664.532227)}}, - {{SIMDE_FLOAT32_C(-636.68), SIMDE_FLOAT32_C(554.68), - SIMDE_FLOAT32_C(-385.40), SIMDE_FLOAT32_C(-565.95)}, - {SIMDE_FLOAT32_C(129.69), SIMDE_FLOAT32_C(961.79), - SIMDE_FLOAT32_C(-333.22), SIMDE_FLOAT32_C(69.65)}, - {SIMDE_FLOAT32_C(726.96), SIMDE_FLOAT32_C(131.41)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-94916.125000), SIMDE_FLOAT32_C(-93724.765625), - SIMDE_FLOAT32_C(241852.218750), SIMDE_FLOAT32_C(241671.671875)}}, - {{SIMDE_FLOAT32_C(-211.91), SIMDE_FLOAT32_C(829.24), - SIMDE_FLOAT32_C(-475.13), SIMDE_FLOAT32_C(562.33)}, - {SIMDE_FLOAT32_C(515.59), SIMDE_FLOAT32_C(-290.69), - SIMDE_FLOAT32_C(-816.53), SIMDE_FLOAT32_C(17.39)}, - {SIMDE_FLOAT32_C(268.14), SIMDE_FLOAT32_C(729.88)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-138462.234375), SIMDE_FLOAT32_C(-137421.078125), - SIMDE_FLOAT32_C(218469.250000), SIMDE_FLOAT32_C(219506.703125)}}, - {{SIMDE_FLOAT32_C(-894.99), SIMDE_FLOAT32_C(516.42), - SIMDE_FLOAT32_C(-169.55), SIMDE_FLOAT32_C(696.41)}, - {SIMDE_FLOAT32_C(-388.51), SIMDE_FLOAT32_C(987.71), - SIMDE_FLOAT32_C(-91.49), SIMDE_FLOAT32_C(-970.85)}, - {SIMDE_FLOAT32_C(357.28), SIMDE_FLOAT32_C(-28.01)}, - INT32_C(0), - {SIMDE_FLOAT32_C(137911.859375), SIMDE_FLOAT32_C(139323.281250), - SIMDE_FLOAT32_C(32517.996094), SIMDE_FLOAT32_C(33383.957031)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT32_C( 301.65), SIMDE_FLOAT32_C( 490.71), SIMDE_FLOAT32_C( -744.66), SIMDE_FLOAT32_C( -738.17) }, + { SIMDE_FLOAT32_C( -301.20), SIMDE_FLOAT32_C( -904.34), SIMDE_FLOAT32_C( 771.98), SIMDE_FLOAT32_C( 233.71) }, + { SIMDE_FLOAT32_C( 830.18), SIMDE_FLOAT32_C( 979.39) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(250351.875000), SIMDE_FLOAT32_C(250540.937500), SIMDE_FLOAT32_C(-641627.000000), SIMDE_FLOAT32_C(-641620.500000) } }, + { { SIMDE_FLOAT32_C( -38.01), SIMDE_FLOAT32_C( 151.61), SIMDE_FLOAT32_C( 201.45), SIMDE_FLOAT32_C( -747.32) }, + { SIMDE_FLOAT32_C( -331.17), SIMDE_FLOAT32_C( 7.62), SIMDE_FLOAT32_C( -454.77), SIMDE_FLOAT32_C( -381.81) }, + { SIMDE_FLOAT32_C( 236.39), SIMDE_FLOAT32_C( -158.94) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(78247.265625), SIMDE_FLOAT32_C(78436.890625), SIMDE_FLOAT32_C(107704.531250), SIMDE_FLOAT32_C(106755.757812) } }, + { { SIMDE_FLOAT32_C( 605.85), SIMDE_FLOAT32_C( 244.27), SIMDE_FLOAT32_C( -426.53), SIMDE_FLOAT32_C( -969.18) }, + { SIMDE_FLOAT32_C( 322.13), SIMDE_FLOAT32_C( 863.77), SIMDE_FLOAT32_C( -685.35), SIMDE_FLOAT32_C( -710.70) }, + { SIMDE_FLOAT32_C( -791.12), SIMDE_FLOAT32_C( 373.53) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(255449.343750), SIMDE_FLOAT32_C(255087.765625), SIMDE_FLOAT32_C(-542620.625000), SIMDE_FLOAT32_C(-543163.250000) } }, + { { SIMDE_FLOAT32_C( -606.46), SIMDE_FLOAT32_C( -507.55), SIMDE_FLOAT32_C( -68.24), SIMDE_FLOAT32_C( -823.05) }, + { SIMDE_FLOAT32_C( -359.95), SIMDE_FLOAT32_C( 611.92), SIMDE_FLOAT32_C( 514.14), SIMDE_FLOAT32_C( -660.86) }, + { SIMDE_FLOAT32_C( 181.81), SIMDE_FLOAT32_C( 115.86) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(64836.050781), SIMDE_FLOAT32_C(64934.960938), SIMDE_FLOAT32_C(-93544.031250), SIMDE_FLOAT32_C(-94298.843750) } }, + { { SIMDE_FLOAT32_C( 951.84), SIMDE_FLOAT32_C( -864.96), SIMDE_FLOAT32_C( 970.59), SIMDE_FLOAT32_C( 769.97) }, + { SIMDE_FLOAT32_C( 609.64), SIMDE_FLOAT32_C( -580.87), SIMDE_FLOAT32_C( 358.59), SIMDE_FLOAT32_C( 350.56) }, + { SIMDE_FLOAT32_C( 26.31), SIMDE_FLOAT32_C( 163.74) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-15087.788086), SIMDE_FLOAT32_C(-16904.587891), SIMDE_FLOAT32_C(-8463.912109), SIMDE_FLOAT32_C(-8664.532227) } }, + { { SIMDE_FLOAT32_C( -636.68), SIMDE_FLOAT32_C( 554.68), SIMDE_FLOAT32_C( -385.40), SIMDE_FLOAT32_C( -565.95) }, + { SIMDE_FLOAT32_C( 129.69), SIMDE_FLOAT32_C( 961.79), SIMDE_FLOAT32_C( -333.22), SIMDE_FLOAT32_C( 69.65) }, + { SIMDE_FLOAT32_C( 726.96), SIMDE_FLOAT32_C( 131.41) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-94916.125000), SIMDE_FLOAT32_C(-93724.765625), SIMDE_FLOAT32_C(241852.218750), SIMDE_FLOAT32_C(241671.671875) } }, + { { SIMDE_FLOAT32_C( -211.91), SIMDE_FLOAT32_C( 829.24), SIMDE_FLOAT32_C( -475.13), SIMDE_FLOAT32_C( 562.33) }, + { SIMDE_FLOAT32_C( 515.59), SIMDE_FLOAT32_C( -290.69), SIMDE_FLOAT32_C( -816.53), SIMDE_FLOAT32_C( 17.39) }, + { SIMDE_FLOAT32_C( 268.14), SIMDE_FLOAT32_C( 729.88) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-138462.234375), SIMDE_FLOAT32_C(-137421.078125), SIMDE_FLOAT32_C(218469.250000), SIMDE_FLOAT32_C(219506.703125) } }, + { { SIMDE_FLOAT32_C( -894.99), SIMDE_FLOAT32_C( 516.42), SIMDE_FLOAT32_C( -169.55), SIMDE_FLOAT32_C( 696.41) }, + { SIMDE_FLOAT32_C( -388.51), SIMDE_FLOAT32_C( 987.71), SIMDE_FLOAT32_C( -91.49), SIMDE_FLOAT32_C( -970.85) }, + { SIMDE_FLOAT32_C( 357.28), SIMDE_FLOAT32_C( -28.01) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(137911.859375), SIMDE_FLOAT32_C(139323.281250), SIMDE_FLOAT32_C(32517.996094), SIMDE_FLOAT32_C(33383.957031) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x4_t r = simde_vcmlaq_rot180_lane_f32(r_, a, b, 0); - simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), - 1); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 0, 0, 0, 0, 0, 0, 0}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcmlaq_lane_f32(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -693,7 +524,8 @@ static int test_simde_vcmlaq_rot180_lane_f32(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmlaq_rot180_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmlaq_rot180_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float16_t r_[8]; @@ -702,168 +534,99 @@ static int test_simde_vcmlaq_rot180_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[8]; } test_vec[] = { - {{SIMDE_FLOAT16_VALUE(-30.36), SIMDE_FLOAT16_VALUE(631.50), - SIMDE_FLOAT16_VALUE(851.00), SIMDE_FLOAT16_VALUE(-263.50), - SIMDE_FLOAT16_VALUE(140.00), SIMDE_FLOAT16_VALUE(859.00), - SIMDE_FLOAT16_VALUE(-834.50), SIMDE_FLOAT16_VALUE(216.12)}, - {SIMDE_FLOAT16_VALUE(996.00), SIMDE_FLOAT16_VALUE(529.50), - SIMDE_FLOAT16_VALUE(79.06), SIMDE_FLOAT16_VALUE(947.00), - SIMDE_FLOAT16_VALUE(122.00), SIMDE_FLOAT16_VALUE(-250.00), - SIMDE_FLOAT16_VALUE(-361.75), SIMDE_FLOAT16_VALUE(265.25)}, - {SIMDE_FLOAT16_VALUE(58.66), SIMDE_FLOAT16_VALUE(2.71), - SIMDE_FLOAT16_VALUE(99.81), SIMDE_FLOAT16_VALUE(-137.62), - SIMDE_FLOAT16_VALUE(-761.00), SIMDE_FLOAT16_VALUE(813.00), - SIMDE_FLOAT16_VALUE(-897.50), SIMDE_FLOAT16_VALUE(653.50)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-58464.00), SIMDE_FLOAT16_VALUE(-57792.00), - SIMDE_FLOAT16_VALUE(-3786.00), SIMDE_FLOAT16_VALUE(-4900.00), - SIMDE_FLOAT16_VALUE(-7016.00), SIMDE_FLOAT16_VALUE(-6296.00), - SIMDE_FLOAT16_VALUE(20384.00), SIMDE_FLOAT16_VALUE(21440.00)}}, - {{SIMDE_FLOAT16_VALUE(396.00), SIMDE_FLOAT16_VALUE(413.00), - SIMDE_FLOAT16_VALUE(514.00), SIMDE_FLOAT16_VALUE(-977.50), - SIMDE_FLOAT16_VALUE(-672.00), SIMDE_FLOAT16_VALUE(-92.12), - SIMDE_FLOAT16_VALUE(-441.25), SIMDE_FLOAT16_VALUE(-374.25)}, - {SIMDE_FLOAT16_VALUE(-152.00), SIMDE_FLOAT16_VALUE(-79.56), - SIMDE_FLOAT16_VALUE(-214.62), SIMDE_FLOAT16_VALUE(-615.00), - SIMDE_FLOAT16_VALUE(79.00), SIMDE_FLOAT16_VALUE(84.00), - SIMDE_FLOAT16_VALUE(493.00), SIMDE_FLOAT16_VALUE(-96.00)}, - {SIMDE_FLOAT16_VALUE(104.12), SIMDE_FLOAT16_VALUE(78.50), - SIMDE_FLOAT16_VALUE(171.50), SIMDE_FLOAT16_VALUE(-682.50), - SIMDE_FLOAT16_VALUE(217.12), SIMDE_FLOAT16_VALUE(49.34), - SIMDE_FLOAT16_VALUE(256.50), SIMDE_FLOAT16_VALUE(-92.06)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(12328.00), SIMDE_FLOAT16_VALUE(12344.00), - SIMDE_FLOAT16_VALUE(17360.00), SIMDE_FLOAT16_VALUE(15872.00), - SIMDE_FLOAT16_VALUE(-6872.00), SIMDE_FLOAT16_VALUE(-6292.00), - SIMDE_FLOAT16_VALUE(-39136.00), SIMDE_FLOAT16_VALUE(-39072.00)}}, - {{SIMDE_FLOAT16_VALUE(-728.00), SIMDE_FLOAT16_VALUE(-108.38), - SIMDE_FLOAT16_VALUE(-77.88), SIMDE_FLOAT16_VALUE(-353.00), - SIMDE_FLOAT16_VALUE(-239.00), SIMDE_FLOAT16_VALUE(704.50), - SIMDE_FLOAT16_VALUE(914.00), SIMDE_FLOAT16_VALUE(-211.12)}, - {SIMDE_FLOAT16_VALUE(-473.25), SIMDE_FLOAT16_VALUE(74.38), - SIMDE_FLOAT16_VALUE(904.50), SIMDE_FLOAT16_VALUE(-290.50), - SIMDE_FLOAT16_VALUE(-796.00), SIMDE_FLOAT16_VALUE(421.25), - SIMDE_FLOAT16_VALUE(215.75), SIMDE_FLOAT16_VALUE(249.38)}, - {SIMDE_FLOAT16_VALUE(-523.00), SIMDE_FLOAT16_VALUE(-720.00), - SIMDE_FLOAT16_VALUE(32.75), SIMDE_FLOAT16_VALUE(-487.75), - SIMDE_FLOAT16_VALUE(-705.50), SIMDE_FLOAT16_VALUE(-468.00), - SIMDE_FLOAT16_VALUE(-789.00), SIMDE_FLOAT16_VALUE(-866.00)}, - INT32_C(2), - {SIMDE_FLOAT16_VALUE(14768.00), SIMDE_FLOAT16_VALUE(15392.00), - SIMDE_FLOAT16_VALUE(-29696.00), SIMDE_FLOAT16_VALUE(-29968.00), - SIMDE_FLOAT16_VALUE(25824.00), SIMDE_FLOAT16_VALUE(26768.00), - SIMDE_FLOAT16_VALUE(-6152.00), SIMDE_FLOAT16_VALUE(-7276.00)}}, - {{SIMDE_FLOAT16_VALUE(-891.50), SIMDE_FLOAT16_VALUE(-299.00), - SIMDE_FLOAT16_VALUE(-595.00), SIMDE_FLOAT16_VALUE(-662.00), - SIMDE_FLOAT16_VALUE(-914.00), SIMDE_FLOAT16_VALUE(674.50), - SIMDE_FLOAT16_VALUE(771.50), SIMDE_FLOAT16_VALUE(14.33)}, - {SIMDE_FLOAT16_VALUE(880.00), SIMDE_FLOAT16_VALUE(767.00), - SIMDE_FLOAT16_VALUE(-738.50), SIMDE_FLOAT16_VALUE(581.50), - SIMDE_FLOAT16_VALUE(-342.00), SIMDE_FLOAT16_VALUE(580.50), - SIMDE_FLOAT16_VALUE(534.00), SIMDE_FLOAT16_VALUE(-671.00)}, - {SIMDE_FLOAT16_VALUE(-482.75), SIMDE_FLOAT16_VALUE(382.25), - SIMDE_FLOAT16_VALUE(503.00), SIMDE_FLOAT16_VALUE(35.00), - SIMDE_FLOAT16_VALUE(315.50), SIMDE_FLOAT16_VALUE(-23.56), - SIMDE_FLOAT16_VALUE(53.88), SIMDE_FLOAT16_VALUE(722.00)}, - INT32_C(3), - {SIMDE_FLOAT16_VALUE(-31696.00), SIMDE_FLOAT16_VALUE(-31104.00), - SIMDE_FLOAT16_VALUE(25248.00), SIMDE_FLOAT16_VALUE(25184.00), - SIMDE_FLOAT16_VALUE(11056.00), SIMDE_FLOAT16_VALUE(12648.00), - SIMDE_FLOAT16_VALUE(-17920.00), SIMDE_FLOAT16_VALUE(-18672.00)}}, - {{SIMDE_FLOAT16_VALUE(525.50), SIMDE_FLOAT16_VALUE(-679.00), - SIMDE_FLOAT16_VALUE(491.50), SIMDE_FLOAT16_VALUE(-505.00), - SIMDE_FLOAT16_VALUE(914.50), SIMDE_FLOAT16_VALUE(-312.00), - SIMDE_FLOAT16_VALUE(-404.50), SIMDE_FLOAT16_VALUE(-634.00)}, - {SIMDE_FLOAT16_VALUE(-86.62), SIMDE_FLOAT16_VALUE(-914.50), - SIMDE_FLOAT16_VALUE(-839.50), SIMDE_FLOAT16_VALUE(817.50), - SIMDE_FLOAT16_VALUE(-187.25), SIMDE_FLOAT16_VALUE(422.75), - SIMDE_FLOAT16_VALUE(604.50), SIMDE_FLOAT16_VALUE(-735.00)}, - {SIMDE_FLOAT16_VALUE(-21.31), SIMDE_FLOAT16_VALUE(-29.59), - SIMDE_FLOAT16_VALUE(-725.00), SIMDE_FLOAT16_VALUE(-503.00), - SIMDE_FLOAT16_VALUE(-75.56), SIMDE_FLOAT16_VALUE(215.38), - SIMDE_FLOAT16_VALUE(-742.00), SIMDE_FLOAT16_VALUE(-854.00)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-1321.00), SIMDE_FLOAT16_VALUE(-2526.00), - SIMDE_FLOAT16_VALUE(-17408.00), SIMDE_FLOAT16_VALUE(-18400.00), - SIMDE_FLOAT16_VALUE(-3076.00), SIMDE_FLOAT16_VALUE(-4304.00), - SIMDE_FLOAT16_VALUE(12480.00), SIMDE_FLOAT16_VALUE(12248.00)}}, - {{SIMDE_FLOAT16_VALUE(-618.50), SIMDE_FLOAT16_VALUE(164.88), - SIMDE_FLOAT16_VALUE(304.75), SIMDE_FLOAT16_VALUE(-530.50), - SIMDE_FLOAT16_VALUE(-603.50), SIMDE_FLOAT16_VALUE(730.50), - SIMDE_FLOAT16_VALUE(46.66), SIMDE_FLOAT16_VALUE(629.00)}, - {SIMDE_FLOAT16_VALUE(-537.00), SIMDE_FLOAT16_VALUE(637.00), - SIMDE_FLOAT16_VALUE(884.50), SIMDE_FLOAT16_VALUE(378.25), - SIMDE_FLOAT16_VALUE(-10.17), SIMDE_FLOAT16_VALUE(-730.00), - SIMDE_FLOAT16_VALUE(-981.50), SIMDE_FLOAT16_VALUE(453.25)}, - {SIMDE_FLOAT16_VALUE(-575.00), SIMDE_FLOAT16_VALUE(12.88), - SIMDE_FLOAT16_VALUE(-667.50), SIMDE_FLOAT16_VALUE(380.50), - SIMDE_FLOAT16_VALUE(374.75), SIMDE_FLOAT16_VALUE(-222.50), - SIMDE_FLOAT16_VALUE(206.88), SIMDE_FLOAT16_VALUE(502.25)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(6300.00), SIMDE_FLOAT16_VALUE(7084.00), - SIMDE_FLOAT16_VALUE(-11088.00), SIMDE_FLOAT16_VALUE(-11928.00), - SIMDE_FLOAT16_VALUE(-472.50), SIMDE_FLOAT16_VALUE(861.50), - SIMDE_FLOAT16_VALUE(12688.00), SIMDE_FLOAT16_VALUE(13272.00)}}, - {{SIMDE_FLOAT16_VALUE(-825.50), SIMDE_FLOAT16_VALUE(-472.75), - SIMDE_FLOAT16_VALUE(-531.00), SIMDE_FLOAT16_VALUE(-366.75), - SIMDE_FLOAT16_VALUE(143.12), SIMDE_FLOAT16_VALUE(698.50), - SIMDE_FLOAT16_VALUE(700.00), SIMDE_FLOAT16_VALUE(498.25)}, - {SIMDE_FLOAT16_VALUE(908.00), SIMDE_FLOAT16_VALUE(845.50), - SIMDE_FLOAT16_VALUE(-383.50), SIMDE_FLOAT16_VALUE(383.50), - SIMDE_FLOAT16_VALUE(357.75), SIMDE_FLOAT16_VALUE(-900.50), - SIMDE_FLOAT16_VALUE(-802.00), SIMDE_FLOAT16_VALUE(966.50)}, - {SIMDE_FLOAT16_VALUE(-993.00), SIMDE_FLOAT16_VALUE(477.50), - SIMDE_FLOAT16_VALUE(-23.00), SIMDE_FLOAT16_VALUE(102.38), - SIMDE_FLOAT16_VALUE(988.50), SIMDE_FLOAT16_VALUE(-311.75), - SIMDE_FLOAT16_VALUE(-668.50), SIMDE_FLOAT16_VALUE(148.25)}, - INT32_C(2), - {SIMDE_FLOAT16_VALUE(20064.00), SIMDE_FLOAT16_VALUE(20416.00), - SIMDE_FLOAT16_VALUE(-9352.00), SIMDE_FLOAT16_VALUE(-9184.00), - SIMDE_FLOAT16_VALUE(8368.00), SIMDE_FLOAT16_VALUE(8928.00), - SIMDE_FLOAT16_VALUE(-17744.00), SIMDE_FLOAT16_VALUE(-17952.00)}}, - {{SIMDE_FLOAT16_VALUE(213.88), SIMDE_FLOAT16_VALUE(337.75), - SIMDE_FLOAT16_VALUE(330.50), SIMDE_FLOAT16_VALUE(-88.56), - SIMDE_FLOAT16_VALUE(191.12), SIMDE_FLOAT16_VALUE(-615.00), - SIMDE_FLOAT16_VALUE(-113.25), SIMDE_FLOAT16_VALUE(775.50)}, - {SIMDE_FLOAT16_VALUE(295.50), SIMDE_FLOAT16_VALUE(687.00), - SIMDE_FLOAT16_VALUE(406.25), SIMDE_FLOAT16_VALUE(439.50), - SIMDE_FLOAT16_VALUE(-827.50), SIMDE_FLOAT16_VALUE(733.00), - SIMDE_FLOAT16_VALUE(499.00), SIMDE_FLOAT16_VALUE(931.00)}, - {SIMDE_FLOAT16_VALUE(790.00), SIMDE_FLOAT16_VALUE(-979.00), - SIMDE_FLOAT16_VALUE(70.62), SIMDE_FLOAT16_VALUE(-47.00), - SIMDE_FLOAT16_VALUE(228.50), SIMDE_FLOAT16_VALUE(-233.50), - SIMDE_FLOAT16_VALUE(-467.50), SIMDE_FLOAT16_VALUE(545.00)}, - INT32_C(3), - {SIMDE_FLOAT16_VALUE(14104.00), SIMDE_FLOAT16_VALUE(14224.00), - SIMDE_FLOAT16_VALUE(19424.00), SIMDE_FLOAT16_VALUE(19008.00), - SIMDE_FLOAT16_VALUE(-38688.00), SIMDE_FLOAT16_VALUE(-39520.00), - SIMDE_FLOAT16_VALUE(23344.00), SIMDE_FLOAT16_VALUE(24224.00)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT16_VALUE( -30.36), SIMDE_FLOAT16_VALUE( 631.50), SIMDE_FLOAT16_VALUE( 851.00), SIMDE_FLOAT16_VALUE( -263.50), + SIMDE_FLOAT16_VALUE( 140.00), SIMDE_FLOAT16_VALUE( 859.00), SIMDE_FLOAT16_VALUE( -834.50), SIMDE_FLOAT16_VALUE( 216.12) }, + { SIMDE_FLOAT16_VALUE( 996.00), SIMDE_FLOAT16_VALUE( 529.50), SIMDE_FLOAT16_VALUE( 79.06), SIMDE_FLOAT16_VALUE( 947.00), + SIMDE_FLOAT16_VALUE( 122.00), SIMDE_FLOAT16_VALUE( -250.00), SIMDE_FLOAT16_VALUE( -361.75), SIMDE_FLOAT16_VALUE( 265.25) }, + { SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 2.71), SIMDE_FLOAT16_VALUE( 99.81), SIMDE_FLOAT16_VALUE( -137.62), + SIMDE_FLOAT16_VALUE( -761.00), SIMDE_FLOAT16_VALUE( 813.00), SIMDE_FLOAT16_VALUE( -897.50), SIMDE_FLOAT16_VALUE( 653.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-58464.00), SIMDE_FLOAT16_VALUE(-57792.00), SIMDE_FLOAT16_VALUE( -3786.00), SIMDE_FLOAT16_VALUE( -4900.00), + SIMDE_FLOAT16_VALUE( -7016.00), SIMDE_FLOAT16_VALUE( -6296.00), SIMDE_FLOAT16_VALUE( 20384.00), SIMDE_FLOAT16_VALUE( 21440.00) } }, + { { SIMDE_FLOAT16_VALUE( 396.00), SIMDE_FLOAT16_VALUE( 413.00), SIMDE_FLOAT16_VALUE( 514.00), SIMDE_FLOAT16_VALUE( -977.50), + SIMDE_FLOAT16_VALUE( -672.00), SIMDE_FLOAT16_VALUE( -92.12), SIMDE_FLOAT16_VALUE( -441.25), SIMDE_FLOAT16_VALUE( -374.25) }, + { SIMDE_FLOAT16_VALUE( -152.00), SIMDE_FLOAT16_VALUE( -79.56), SIMDE_FLOAT16_VALUE( -214.62), SIMDE_FLOAT16_VALUE( -615.00), + SIMDE_FLOAT16_VALUE( 79.00), SIMDE_FLOAT16_VALUE( 84.00), SIMDE_FLOAT16_VALUE( 493.00), SIMDE_FLOAT16_VALUE( -96.00) }, + { SIMDE_FLOAT16_VALUE( 104.12), SIMDE_FLOAT16_VALUE( 78.50), SIMDE_FLOAT16_VALUE( 171.50), SIMDE_FLOAT16_VALUE( -682.50), + SIMDE_FLOAT16_VALUE( 217.12), SIMDE_FLOAT16_VALUE( 49.34), SIMDE_FLOAT16_VALUE( 256.50), SIMDE_FLOAT16_VALUE( -92.06) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 12328.00), SIMDE_FLOAT16_VALUE( 12344.00), SIMDE_FLOAT16_VALUE( 17360.00), SIMDE_FLOAT16_VALUE( 15872.00), + SIMDE_FLOAT16_VALUE( -6872.00), SIMDE_FLOAT16_VALUE( -6292.00), SIMDE_FLOAT16_VALUE(-39136.00), SIMDE_FLOAT16_VALUE(-39072.00) } }, + { { SIMDE_FLOAT16_VALUE( -728.00), SIMDE_FLOAT16_VALUE( -108.38), SIMDE_FLOAT16_VALUE( -77.88), SIMDE_FLOAT16_VALUE( -353.00), + SIMDE_FLOAT16_VALUE( -239.00), SIMDE_FLOAT16_VALUE( 704.50), SIMDE_FLOAT16_VALUE( 914.00), SIMDE_FLOAT16_VALUE( -211.12) }, + { SIMDE_FLOAT16_VALUE( -473.25), SIMDE_FLOAT16_VALUE( 74.38), SIMDE_FLOAT16_VALUE( 904.50), SIMDE_FLOAT16_VALUE( -290.50), + SIMDE_FLOAT16_VALUE( -796.00), SIMDE_FLOAT16_VALUE( 421.25), SIMDE_FLOAT16_VALUE( 215.75), SIMDE_FLOAT16_VALUE( 249.38) }, + { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -720.00), SIMDE_FLOAT16_VALUE( 32.75), SIMDE_FLOAT16_VALUE( -487.75), + SIMDE_FLOAT16_VALUE( -705.50), SIMDE_FLOAT16_VALUE( -468.00), SIMDE_FLOAT16_VALUE( -789.00), SIMDE_FLOAT16_VALUE( -866.00) }, + INT32_C( 2), + { SIMDE_FLOAT16_VALUE( 14768.00), SIMDE_FLOAT16_VALUE( 15392.00), SIMDE_FLOAT16_VALUE(-29696.00), SIMDE_FLOAT16_VALUE(-29968.00), + SIMDE_FLOAT16_VALUE( 25824.00), SIMDE_FLOAT16_VALUE( 26768.00), SIMDE_FLOAT16_VALUE( -6152.00), SIMDE_FLOAT16_VALUE( -7276.00) } }, + { { SIMDE_FLOAT16_VALUE( -891.50), SIMDE_FLOAT16_VALUE( -299.00), SIMDE_FLOAT16_VALUE( -595.00), SIMDE_FLOAT16_VALUE( -662.00), + SIMDE_FLOAT16_VALUE( -914.00), SIMDE_FLOAT16_VALUE( 674.50), SIMDE_FLOAT16_VALUE( 771.50), SIMDE_FLOAT16_VALUE( 14.33) }, + { SIMDE_FLOAT16_VALUE( 880.00), SIMDE_FLOAT16_VALUE( 767.00), SIMDE_FLOAT16_VALUE( -738.50), SIMDE_FLOAT16_VALUE( 581.50), + SIMDE_FLOAT16_VALUE( -342.00), SIMDE_FLOAT16_VALUE( 580.50), SIMDE_FLOAT16_VALUE( 534.00), SIMDE_FLOAT16_VALUE( -671.00) }, + { SIMDE_FLOAT16_VALUE( -482.75), SIMDE_FLOAT16_VALUE( 382.25), SIMDE_FLOAT16_VALUE( 503.00), SIMDE_FLOAT16_VALUE( 35.00), + SIMDE_FLOAT16_VALUE( 315.50), SIMDE_FLOAT16_VALUE( -23.56), SIMDE_FLOAT16_VALUE( 53.88), SIMDE_FLOAT16_VALUE( 722.00) }, + INT32_C( 3), + { SIMDE_FLOAT16_VALUE(-31696.00), SIMDE_FLOAT16_VALUE(-31104.00), SIMDE_FLOAT16_VALUE( 25248.00), SIMDE_FLOAT16_VALUE( 25184.00), + SIMDE_FLOAT16_VALUE( 11056.00), SIMDE_FLOAT16_VALUE( 12648.00), SIMDE_FLOAT16_VALUE(-17920.00), SIMDE_FLOAT16_VALUE(-18672.00) } }, + { { SIMDE_FLOAT16_VALUE( 525.50), SIMDE_FLOAT16_VALUE( -679.00), SIMDE_FLOAT16_VALUE( 491.50), SIMDE_FLOAT16_VALUE( -505.00), + SIMDE_FLOAT16_VALUE( 914.50), SIMDE_FLOAT16_VALUE( -312.00), SIMDE_FLOAT16_VALUE( -404.50), SIMDE_FLOAT16_VALUE( -634.00) }, + { SIMDE_FLOAT16_VALUE( -86.62), SIMDE_FLOAT16_VALUE( -914.50), SIMDE_FLOAT16_VALUE( -839.50), SIMDE_FLOAT16_VALUE( 817.50), + SIMDE_FLOAT16_VALUE( -187.25), SIMDE_FLOAT16_VALUE( 422.75), SIMDE_FLOAT16_VALUE( 604.50), SIMDE_FLOAT16_VALUE( -735.00) }, + { SIMDE_FLOAT16_VALUE( -21.31), SIMDE_FLOAT16_VALUE( -29.59), SIMDE_FLOAT16_VALUE( -725.00), SIMDE_FLOAT16_VALUE( -503.00), + SIMDE_FLOAT16_VALUE( -75.56), SIMDE_FLOAT16_VALUE( 215.38), SIMDE_FLOAT16_VALUE( -742.00), SIMDE_FLOAT16_VALUE( -854.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( -1321.00), SIMDE_FLOAT16_VALUE( -2526.00), SIMDE_FLOAT16_VALUE(-17408.00), SIMDE_FLOAT16_VALUE(-18400.00), + SIMDE_FLOAT16_VALUE( -3076.00), SIMDE_FLOAT16_VALUE( -4304.00), SIMDE_FLOAT16_VALUE( 12480.00), SIMDE_FLOAT16_VALUE( 12248.00) } }, + { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( 164.88), SIMDE_FLOAT16_VALUE( 304.75), SIMDE_FLOAT16_VALUE( -530.50), + SIMDE_FLOAT16_VALUE( -603.50), SIMDE_FLOAT16_VALUE( 730.50), SIMDE_FLOAT16_VALUE( 46.66), SIMDE_FLOAT16_VALUE( 629.00) }, + { SIMDE_FLOAT16_VALUE( -537.00), SIMDE_FLOAT16_VALUE( 637.00), SIMDE_FLOAT16_VALUE( 884.50), SIMDE_FLOAT16_VALUE( 378.25), + SIMDE_FLOAT16_VALUE( -10.17), SIMDE_FLOAT16_VALUE( -730.00), SIMDE_FLOAT16_VALUE( -981.50), SIMDE_FLOAT16_VALUE( 453.25) }, + { SIMDE_FLOAT16_VALUE( -575.00), SIMDE_FLOAT16_VALUE( 12.88), SIMDE_FLOAT16_VALUE( -667.50), SIMDE_FLOAT16_VALUE( 380.50), + SIMDE_FLOAT16_VALUE( 374.75), SIMDE_FLOAT16_VALUE( -222.50), SIMDE_FLOAT16_VALUE( 206.88), SIMDE_FLOAT16_VALUE( 502.25) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 6300.00), SIMDE_FLOAT16_VALUE( 7084.00), SIMDE_FLOAT16_VALUE(-11088.00), SIMDE_FLOAT16_VALUE(-11928.00), + SIMDE_FLOAT16_VALUE( -472.50), SIMDE_FLOAT16_VALUE( 861.50), SIMDE_FLOAT16_VALUE( 12688.00), SIMDE_FLOAT16_VALUE( 13272.00) } }, + { { SIMDE_FLOAT16_VALUE( -825.50), SIMDE_FLOAT16_VALUE( -472.75), SIMDE_FLOAT16_VALUE( -531.00), SIMDE_FLOAT16_VALUE( -366.75), + SIMDE_FLOAT16_VALUE( 143.12), SIMDE_FLOAT16_VALUE( 698.50), SIMDE_FLOAT16_VALUE( 700.00), SIMDE_FLOAT16_VALUE( 498.25) }, + { SIMDE_FLOAT16_VALUE( 908.00), SIMDE_FLOAT16_VALUE( 845.50), SIMDE_FLOAT16_VALUE( -383.50), SIMDE_FLOAT16_VALUE( 383.50), + SIMDE_FLOAT16_VALUE( 357.75), SIMDE_FLOAT16_VALUE( -900.50), SIMDE_FLOAT16_VALUE( -802.00), SIMDE_FLOAT16_VALUE( 966.50) }, + { SIMDE_FLOAT16_VALUE( -993.00), SIMDE_FLOAT16_VALUE( 477.50), SIMDE_FLOAT16_VALUE( -23.00), SIMDE_FLOAT16_VALUE( 102.38), + SIMDE_FLOAT16_VALUE( 988.50), SIMDE_FLOAT16_VALUE( -311.75), SIMDE_FLOAT16_VALUE( -668.50), SIMDE_FLOAT16_VALUE( 148.25) }, + INT32_C( 2), + { SIMDE_FLOAT16_VALUE( 20064.00), SIMDE_FLOAT16_VALUE( 20416.00), SIMDE_FLOAT16_VALUE( -9352.00), SIMDE_FLOAT16_VALUE( -9184.00), + SIMDE_FLOAT16_VALUE( 8368.00), SIMDE_FLOAT16_VALUE( 8928.00), SIMDE_FLOAT16_VALUE(-17744.00), SIMDE_FLOAT16_VALUE(-17952.00) } }, + { { SIMDE_FLOAT16_VALUE( 213.88), SIMDE_FLOAT16_VALUE( 337.75), SIMDE_FLOAT16_VALUE( 330.50), SIMDE_FLOAT16_VALUE( -88.56), + SIMDE_FLOAT16_VALUE( 191.12), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 775.50) }, + { SIMDE_FLOAT16_VALUE( 295.50), SIMDE_FLOAT16_VALUE( 687.00), SIMDE_FLOAT16_VALUE( 406.25), SIMDE_FLOAT16_VALUE( 439.50), + SIMDE_FLOAT16_VALUE( -827.50), SIMDE_FLOAT16_VALUE( 733.00), SIMDE_FLOAT16_VALUE( 499.00), SIMDE_FLOAT16_VALUE( 931.00) }, + { SIMDE_FLOAT16_VALUE( 790.00), SIMDE_FLOAT16_VALUE( -979.00), SIMDE_FLOAT16_VALUE( 70.62), SIMDE_FLOAT16_VALUE( -47.00), + SIMDE_FLOAT16_VALUE( 228.50), SIMDE_FLOAT16_VALUE( -233.50), SIMDE_FLOAT16_VALUE( -467.50), SIMDE_FLOAT16_VALUE( 545.00) }, + INT32_C( 3), + { SIMDE_FLOAT16_VALUE( 14104.00), SIMDE_FLOAT16_VALUE( 14224.00), SIMDE_FLOAT16_VALUE( 19424.00), SIMDE_FLOAT16_VALUE( 19008.00), + SIMDE_FLOAT16_VALUE(-38688.00), SIMDE_FLOAT16_VALUE(-39520.00), SIMDE_FLOAT16_VALUE( 23344.00), SIMDE_FLOAT16_VALUE( 24224.00) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); - - simde_float16x8_t r; - SIMDE_CONSTIFY_4_( - simde_vcmlaq_rot180_laneq_f16, r, - (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - test_vec[i].lane, r_, a, b); - simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), - 1); + SIMDE_CONSTIFY_4_(simde_vcmlaq_rot180_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 2, 3, 0, 1, 2, 3}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; + for (int i = 0 ; i < 8 ; i++) { simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t r = simde_vcmlaq_laneq_f16(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -874,7 +637,8 @@ static int test_simde_vcmlaq_rot180_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmlaq_rot180_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmlaq_rot180_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[4]; @@ -883,102 +647,67 @@ static int test_simde_vcmlaq_rot180_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[4]; } test_vec[] = { - {{SIMDE_FLOAT32_C(355.18), SIMDE_FLOAT32_C(169.63), - SIMDE_FLOAT32_C(116.87), SIMDE_FLOAT32_C(-467.19)}, - {SIMDE_FLOAT32_C(-513.94), SIMDE_FLOAT32_C(-999.71), - SIMDE_FLOAT32_C(-285.25), SIMDE_FLOAT32_C(-931.88)}, - {SIMDE_FLOAT32_C(-839.57), SIMDE_FLOAT32_C(-681.40), - SIMDE_FLOAT32_C(-117.60), SIMDE_FLOAT32_C(-459.86)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-431133.437500), SIMDE_FLOAT32_C(-431318.968750), - SIMDE_FLOAT32_C(-239370.468750), SIMDE_FLOAT32_C(-239954.531250)}}, - {{SIMDE_FLOAT32_C(-57.67), SIMDE_FLOAT32_C(-897.07), - SIMDE_FLOAT32_C(118.98), SIMDE_FLOAT32_C(-387.92)}, - {SIMDE_FLOAT32_C(-362.79), SIMDE_FLOAT32_C(160.99), - SIMDE_FLOAT32_C(-2.72), SIMDE_FLOAT32_C(206.65)}, - {SIMDE_FLOAT32_C(49.34), SIMDE_FLOAT32_C(511.85), - SIMDE_FLOAT32_C(547.20), SIMDE_FLOAT32_C(-119.58)}, - INT32_C(1), - {SIMDE_FLOAT32_C(185636.390625), SIMDE_FLOAT32_C(184797.000000), - SIMDE_FLOAT32_C(1511.212036), SIMDE_FLOAT32_C(1004.312012)}}, - {{SIMDE_FLOAT32_C(-219.54), SIMDE_FLOAT32_C(-959.14), - SIMDE_FLOAT32_C(943.92), SIMDE_FLOAT32_C(628.48)}, - {SIMDE_FLOAT32_C(446.65), SIMDE_FLOAT32_C(-500.77), - SIMDE_FLOAT32_C(-347.79), SIMDE_FLOAT32_C(813.11)}, - {SIMDE_FLOAT32_C(-542.25), SIMDE_FLOAT32_C(232.48), - SIMDE_FLOAT32_C(684.35), SIMDE_FLOAT32_C(710.26)}, - INT32_C(0), - {SIMDE_FLOAT32_C(241976.421875), SIMDE_FLOAT32_C(241236.812500), - SIMDE_FLOAT32_C(-187645.218750), SIMDE_FLOAT32_C(-187960.656250)}}, - {{SIMDE_FLOAT32_C(783.09), SIMDE_FLOAT32_C(-727.02), - SIMDE_FLOAT32_C(-586.46), SIMDE_FLOAT32_C(64.33)}, - {SIMDE_FLOAT32_C(-490.08), SIMDE_FLOAT32_C(740.49), - SIMDE_FLOAT32_C(-591.56), SIMDE_FLOAT32_C(-759.78)}, - {SIMDE_FLOAT32_C(-380.84), SIMDE_FLOAT32_C(993.01), - SIMDE_FLOAT32_C(-759.56), SIMDE_FLOAT32_C(861.16)}, - INT32_C(1), - {SIMDE_FLOAT32_C(487437.437500), SIMDE_FLOAT32_C(485927.312500), - SIMDE_FLOAT32_C(586838.562500), SIMDE_FLOAT32_C(587489.312500)}}, - {{SIMDE_FLOAT32_C(998.31), SIMDE_FLOAT32_C(538.40), - SIMDE_FLOAT32_C(-191.12), SIMDE_FLOAT32_C(-434.48)}, - {SIMDE_FLOAT32_C(592.83), SIMDE_FLOAT32_C(820.32), - SIMDE_FLOAT32_C(-296.84), SIMDE_FLOAT32_C(-612.30)}, - {SIMDE_FLOAT32_C(-552.34), SIMDE_FLOAT32_C(329.08), - SIMDE_FLOAT32_C(765.26), SIMDE_FLOAT32_C(-531.08)}, - INT32_C(0), - {SIMDE_FLOAT32_C(328442.062500), SIMDE_FLOAT32_C(327982.156250), - SIMDE_FLOAT32_C(-164147.734375), SIMDE_FLOAT32_C(-164391.093750)}}, - {{SIMDE_FLOAT32_C(52.61), SIMDE_FLOAT32_C(606.93), - SIMDE_FLOAT32_C(-894.22), SIMDE_FLOAT32_C(-854.38)}, - {SIMDE_FLOAT32_C(972.80), SIMDE_FLOAT32_C(-807.39), - SIMDE_FLOAT32_C(668.59), SIMDE_FLOAT32_C(-228.19)}, - {SIMDE_FLOAT32_C(-528.51), SIMDE_FLOAT32_C(730.93), - SIMDE_FLOAT32_C(-230.95), SIMDE_FLOAT32_C(-140.17)}, - INT32_C(1), - {SIMDE_FLOAT32_C(-710996.062500), SIMDE_FLOAT32_C(-710441.750000), - SIMDE_FLOAT32_C(-489586.718750), SIMDE_FLOAT32_C(-489546.875000)}}, - {{SIMDE_FLOAT32_C(556.73), SIMDE_FLOAT32_C(-701.90), - SIMDE_FLOAT32_C(-356.50), SIMDE_FLOAT32_C(-532.42)}, - {SIMDE_FLOAT32_C(856.94), SIMDE_FLOAT32_C(-261.67), - SIMDE_FLOAT32_C(-208.07), SIMDE_FLOAT32_C(27.93)}, - {SIMDE_FLOAT32_C(924.32), SIMDE_FLOAT32_C(-863.60), - SIMDE_FLOAT32_C(-687.65), SIMDE_FLOAT32_C(238.39)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-791530.062500), SIMDE_FLOAT32_C(-792788.687500), - SIMDE_FLOAT32_C(191966.765625), SIMDE_FLOAT32_C(191790.843750)}}, - {{SIMDE_FLOAT32_C(-286.79), SIMDE_FLOAT32_C(630.61), - SIMDE_FLOAT32_C(-989.22), SIMDE_FLOAT32_C(223.21)}, - {SIMDE_FLOAT32_C(812.31), SIMDE_FLOAT32_C(667.33), - SIMDE_FLOAT32_C(841.41), SIMDE_FLOAT32_C(735.52)}, - {SIMDE_FLOAT32_C(308.52), SIMDE_FLOAT32_C(-189.06), - SIMDE_FLOAT32_C(-63.33), SIMDE_FLOAT32_C(837.76)}, - INT32_C(1), - {SIMDE_FLOAT32_C(153288.531250), SIMDE_FLOAT32_C(154205.937500), - SIMDE_FLOAT32_C(158087.750000), SIMDE_FLOAT32_C(159300.171875)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT32_C( 355.18), SIMDE_FLOAT32_C( 169.63), SIMDE_FLOAT32_C( 116.87), SIMDE_FLOAT32_C( -467.19) }, + { SIMDE_FLOAT32_C( -513.94), SIMDE_FLOAT32_C( -999.71), SIMDE_FLOAT32_C( -285.25), SIMDE_FLOAT32_C( -931.88) }, + { SIMDE_FLOAT32_C( -839.57), SIMDE_FLOAT32_C( -681.40), SIMDE_FLOAT32_C( -117.60), SIMDE_FLOAT32_C( -459.86) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-431133.437500), SIMDE_FLOAT32_C(-431318.968750), SIMDE_FLOAT32_C(-239370.468750), SIMDE_FLOAT32_C(-239954.531250) } }, + { { SIMDE_FLOAT32_C( -57.67), SIMDE_FLOAT32_C( -897.07), SIMDE_FLOAT32_C( 118.98), SIMDE_FLOAT32_C( -387.92) }, + { SIMDE_FLOAT32_C( -362.79), SIMDE_FLOAT32_C( 160.99), SIMDE_FLOAT32_C( -2.72), SIMDE_FLOAT32_C( 206.65) }, + { SIMDE_FLOAT32_C( 49.34), SIMDE_FLOAT32_C( 511.85), SIMDE_FLOAT32_C( 547.20), SIMDE_FLOAT32_C( -119.58) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(185636.390625), SIMDE_FLOAT32_C(184797.000000), SIMDE_FLOAT32_C(1511.212036), SIMDE_FLOAT32_C(1004.312012) } }, + { { SIMDE_FLOAT32_C( -219.54), SIMDE_FLOAT32_C( -959.14), SIMDE_FLOAT32_C( 943.92), SIMDE_FLOAT32_C( 628.48) }, + { SIMDE_FLOAT32_C( 446.65), SIMDE_FLOAT32_C( -500.77), SIMDE_FLOAT32_C( -347.79), SIMDE_FLOAT32_C( 813.11) }, + { SIMDE_FLOAT32_C( -542.25), SIMDE_FLOAT32_C( 232.48), SIMDE_FLOAT32_C( 684.35), SIMDE_FLOAT32_C( 710.26) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(241976.421875), SIMDE_FLOAT32_C(241236.812500), SIMDE_FLOAT32_C(-187645.218750), SIMDE_FLOAT32_C(-187960.656250) } }, + { { SIMDE_FLOAT32_C( 783.09), SIMDE_FLOAT32_C( -727.02), SIMDE_FLOAT32_C( -586.46), SIMDE_FLOAT32_C( 64.33) }, + { SIMDE_FLOAT32_C( -490.08), SIMDE_FLOAT32_C( 740.49), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -759.78) }, + { SIMDE_FLOAT32_C( -380.84), SIMDE_FLOAT32_C( 993.01), SIMDE_FLOAT32_C( -759.56), SIMDE_FLOAT32_C( 861.16) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(487437.437500), SIMDE_FLOAT32_C(485927.312500), SIMDE_FLOAT32_C(586838.562500), SIMDE_FLOAT32_C(587489.312500) } }, + { { SIMDE_FLOAT32_C( 998.31), SIMDE_FLOAT32_C( 538.40), SIMDE_FLOAT32_C( -191.12), SIMDE_FLOAT32_C( -434.48) }, + { SIMDE_FLOAT32_C( 592.83), SIMDE_FLOAT32_C( 820.32), SIMDE_FLOAT32_C( -296.84), SIMDE_FLOAT32_C( -612.30) }, + { SIMDE_FLOAT32_C( -552.34), SIMDE_FLOAT32_C( 329.08), SIMDE_FLOAT32_C( 765.26), SIMDE_FLOAT32_C( -531.08) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(328442.062500), SIMDE_FLOAT32_C(327982.156250), SIMDE_FLOAT32_C(-164147.734375), SIMDE_FLOAT32_C(-164391.093750) } }, + { { SIMDE_FLOAT32_C( 52.61), SIMDE_FLOAT32_C( 606.93), SIMDE_FLOAT32_C( -894.22), SIMDE_FLOAT32_C( -854.38) }, + { SIMDE_FLOAT32_C( 972.80), SIMDE_FLOAT32_C( -807.39), SIMDE_FLOAT32_C( 668.59), SIMDE_FLOAT32_C( -228.19) }, + { SIMDE_FLOAT32_C( -528.51), SIMDE_FLOAT32_C( 730.93), SIMDE_FLOAT32_C( -230.95), SIMDE_FLOAT32_C( -140.17) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-710996.062500), SIMDE_FLOAT32_C(-710441.750000), SIMDE_FLOAT32_C(-489586.718750), SIMDE_FLOAT32_C(-489546.875000) } }, + { { SIMDE_FLOAT32_C( 556.73), SIMDE_FLOAT32_C( -701.90), SIMDE_FLOAT32_C( -356.50), SIMDE_FLOAT32_C( -532.42) }, + { SIMDE_FLOAT32_C( 856.94), SIMDE_FLOAT32_C( -261.67), SIMDE_FLOAT32_C( -208.07), SIMDE_FLOAT32_C( 27.93) }, + { SIMDE_FLOAT32_C( 924.32), SIMDE_FLOAT32_C( -863.60), SIMDE_FLOAT32_C( -687.65), SIMDE_FLOAT32_C( 238.39) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-791530.062500), SIMDE_FLOAT32_C(-792788.687500), SIMDE_FLOAT32_C(191966.765625), SIMDE_FLOAT32_C(191790.843750) } }, + { { SIMDE_FLOAT32_C( -286.79), SIMDE_FLOAT32_C( 630.61), SIMDE_FLOAT32_C( -989.22), SIMDE_FLOAT32_C( 223.21) }, + { SIMDE_FLOAT32_C( 812.31), SIMDE_FLOAT32_C( 667.33), SIMDE_FLOAT32_C( 841.41), SIMDE_FLOAT32_C( 735.52) }, + { SIMDE_FLOAT32_C( 308.52), SIMDE_FLOAT32_C( -189.06), SIMDE_FLOAT32_C( -63.33), SIMDE_FLOAT32_C( 837.76) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(153288.531250), SIMDE_FLOAT32_C(154205.937500), SIMDE_FLOAT32_C(158087.750000), SIMDE_FLOAT32_C(159300.171875) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r; - SIMDE_CONSTIFY_2_( - simde_vcmlaq_rot180_laneq_f32, r, - (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), - test_vec[i].lane, r_, a, b); - simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), - 1); + SIMDE_CONSTIFY_2_(simde_vcmlaq_rot180_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcmlaq_rot180_laneq_f32(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); diff --git a/test/arm/neon/cmla_rot270_lane.c b/test/arm/neon/cmla_rot270_lane.c index 488c523f6..0b09c4891 100644 --- a/test/arm/neon/cmla_rot270_lane.c +++ b/test/arm/neon/cmla_rot270_lane.c @@ -1,11 +1,11 @@ #define SIMDE_TEST_ARM_NEON_INSN cmla_rot270_lane +#include "test-neon.h" #include "../../../simde/arm/neon/cmla_rot270_lane.h" - #include "../../../simde/arm/neon/dup_n.h" -#include "test-neon.h" -static int test_simde_vcmla_rot270_lane_f16(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmla_rot270_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float16_t r_[4]; @@ -14,103 +14,71 @@ static int test_simde_vcmla_rot270_lane_f16(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[4]; } test_vec[] = { - {{SIMDE_FLOAT16_VALUE(-49.28), SIMDE_FLOAT16_VALUE(-109.00), - SIMDE_FLOAT16_VALUE(-626.50), SIMDE_FLOAT16_VALUE(-567.00)}, - {SIMDE_FLOAT16_VALUE(-178.88), SIMDE_FLOAT16_VALUE(10.22), - SIMDE_FLOAT16_VALUE(-228.12), SIMDE_FLOAT16_VALUE(-31.19)}, - {SIMDE_FLOAT16_VALUE(42.90), SIMDE_FLOAT16_VALUE(-98.75), - SIMDE_FLOAT16_VALUE(350.00), SIMDE_FLOAT16_VALUE(-48.00)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(389.25), SIMDE_FLOAT16_VALUE(-547.50), - SIMDE_FLOAT16_VALUE(-1965.00), SIMDE_FLOAT16_VALUE(771.00)}}, - {{SIMDE_FLOAT16_VALUE(730.00), SIMDE_FLOAT16_VALUE(103.12), - SIMDE_FLOAT16_VALUE(-381.25), SIMDE_FLOAT16_VALUE(898.50)}, - {SIMDE_FLOAT16_VALUE(293.00), SIMDE_FLOAT16_VALUE(-14.06), - SIMDE_FLOAT16_VALUE(-137.62), SIMDE_FLOAT16_VALUE(722.50)}, - {SIMDE_FLOAT16_VALUE(-6.33), SIMDE_FLOAT16_VALUE(61.38), - SIMDE_FLOAT16_VALUE(115.50), SIMDE_FLOAT16_VALUE(70.38)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-133.12), SIMDE_FLOAT16_VALUE(966.00), - SIMDE_FLOAT16_VALUE(43968.00), SIMDE_FLOAT16_VALUE(-43456.00)}}, - {{SIMDE_FLOAT16_VALUE(-210.88), SIMDE_FLOAT16_VALUE(-430.75), - SIMDE_FLOAT16_VALUE(725.50), SIMDE_FLOAT16_VALUE(-944.50)}, - {SIMDE_FLOAT16_VALUE(142.00), SIMDE_FLOAT16_VALUE(-966.00), - SIMDE_FLOAT16_VALUE(-131.50), SIMDE_FLOAT16_VALUE(462.50)}, - {SIMDE_FLOAT16_VALUE(-61.00), SIMDE_FLOAT16_VALUE(185.62), - SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(749.50)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(58720.00), SIMDE_FLOAT16_VALUE(-59360.00), - SIMDE_FLOAT16_VALUE(-27488.00), SIMDE_FLOAT16_VALUE(27264.00)}}, - {{SIMDE_FLOAT16_VALUE(89.44), SIMDE_FLOAT16_VALUE(-200.50), - SIMDE_FLOAT16_VALUE(-136.50), SIMDE_FLOAT16_VALUE(-180.50)}, - {SIMDE_FLOAT16_VALUE(-157.12), SIMDE_FLOAT16_VALUE(129.00), - SIMDE_FLOAT16_VALUE(99.06), SIMDE_FLOAT16_VALUE(-75.25)}, - {SIMDE_FLOAT16_VALUE(32.59), SIMDE_FLOAT16_VALUE(239.50), - SIMDE_FLOAT16_VALUE(-29.96), SIMDE_FLOAT16_VALUE(-177.50)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(30992.00), SIMDE_FLOAT16_VALUE(-31104.00), - SIMDE_FLOAT16_VALUE(-18160.00), SIMDE_FLOAT16_VALUE(17840.00)}}, - {{SIMDE_FLOAT16_VALUE(167.25), SIMDE_FLOAT16_VALUE(-1.52), - SIMDE_FLOAT16_VALUE(-63.38), SIMDE_FLOAT16_VALUE(57.00)}, - {SIMDE_FLOAT16_VALUE(191.75), SIMDE_FLOAT16_VALUE(-197.00), - SIMDE_FLOAT16_VALUE(285.00), SIMDE_FLOAT16_VALUE(-529.00)}, - {SIMDE_FLOAT16_VALUE(-80.50), SIMDE_FLOAT16_VALUE(375.50), - SIMDE_FLOAT16_VALUE(-206.00), SIMDE_FLOAT16_VALUE(-75.25)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(16024.00), SIMDE_FLOAT16_VALUE(-15864.00), - SIMDE_FLOAT16_VALUE(42528.00), SIMDE_FLOAT16_VALUE(-42528.00)}}, - {{SIMDE_FLOAT16_VALUE(-31.19), SIMDE_FLOAT16_VALUE(-75.25), - SIMDE_FLOAT16_VALUE(-41.00), SIMDE_FLOAT16_VALUE(285.00)}, - {SIMDE_FLOAT16_VALUE(-1.52), SIMDE_FLOAT16_VALUE(10.22), - SIMDE_FLOAT16_VALUE(-271.25), SIMDE_FLOAT16_VALUE(-257.50)}, - {SIMDE_FLOAT16_VALUE(-31.45), SIMDE_FLOAT16_VALUE(-180.50), - SIMDE_FLOAT16_VALUE(69.62), SIMDE_FLOAT16_VALUE(131.38)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-1876.00), SIMDE_FLOAT16_VALUE(1769.00), - SIMDE_FLOAT16_VALUE(46432.00), SIMDE_FLOAT16_VALUE(-46208.00)}}, - {{SIMDE_FLOAT16_VALUE(205.75), SIMDE_FLOAT16_VALUE(-247.00), - SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(17.94)}, - {SIMDE_FLOAT16_VALUE(-397.75), SIMDE_FLOAT16_VALUE(166.12), - SIMDE_FLOAT16_VALUE(-110.75), SIMDE_FLOAT16_VALUE(18.20)}, - {SIMDE_FLOAT16_VALUE(89.81), SIMDE_FLOAT16_VALUE(59.75), - SIMDE_FLOAT16_VALUE(-113.25), SIMDE_FLOAT16_VALUE(97.31)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(15128.00), SIMDE_FLOAT16_VALUE(-15168.00), - SIMDE_FLOAT16_VALUE(1695.00), SIMDE_FLOAT16_VALUE(-1617.00)}}, - {{SIMDE_FLOAT16_VALUE(-869.50), SIMDE_FLOAT16_VALUE(-710.00), - SIMDE_FLOAT16_VALUE(457.25), SIMDE_FLOAT16_VALUE(579.00)}, - {SIMDE_FLOAT16_VALUE(226.38), SIMDE_FLOAT16_VALUE(178.88), - SIMDE_FLOAT16_VALUE(181.75), SIMDE_FLOAT16_VALUE(-151.12)}, - {SIMDE_FLOAT16_VALUE(121.50), SIMDE_FLOAT16_VALUE(-78.44), - SIMDE_FLOAT16_VALUE(48.06), SIMDE_FLOAT16_VALUE(75.88)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-14904.00), SIMDE_FLOAT16_VALUE(13320.00), - SIMDE_FLOAT16_VALUE(12312.00), SIMDE_FLOAT16_VALUE(-11272.00)}} - + { + { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) }, + { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -31.19) }, + { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -48.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 389.25), SIMDE_FLOAT16_VALUE( -547.50), SIMDE_FLOAT16_VALUE( -1965.00), SIMDE_FLOAT16_VALUE( 771.00) } }, + { { SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, + { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -14.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, + { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 61.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -133.12), SIMDE_FLOAT16_VALUE( 966.00), SIMDE_FLOAT16_VALUE( 43968.00), SIMDE_FLOAT16_VALUE(-43456.00) } }, + { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, + { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, + { SIMDE_FLOAT16_VALUE( -61.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 58720.00), SIMDE_FLOAT16_VALUE(-59360.00), SIMDE_FLOAT16_VALUE(-27488.00), SIMDE_FLOAT16_VALUE( 27264.00) } }, + { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -180.50) }, + { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 129.00), SIMDE_FLOAT16_VALUE( 99.06), SIMDE_FLOAT16_VALUE( -75.25) }, + { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 239.50), SIMDE_FLOAT16_VALUE( -29.96), SIMDE_FLOAT16_VALUE( -177.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 30992.00), SIMDE_FLOAT16_VALUE(-31104.00), SIMDE_FLOAT16_VALUE(-18160.00), SIMDE_FLOAT16_VALUE( 17840.00) } }, + { { SIMDE_FLOAT16_VALUE( 167.25), SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( -63.38), SIMDE_FLOAT16_VALUE( 57.00) }, + { SIMDE_FLOAT16_VALUE( 191.75), SIMDE_FLOAT16_VALUE( -197.00), SIMDE_FLOAT16_VALUE( 285.00), SIMDE_FLOAT16_VALUE( -529.00) }, + { SIMDE_FLOAT16_VALUE( -80.50), SIMDE_FLOAT16_VALUE( 375.50), SIMDE_FLOAT16_VALUE( -206.00), SIMDE_FLOAT16_VALUE( -75.25) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 16024.00), SIMDE_FLOAT16_VALUE(-15864.00), SIMDE_FLOAT16_VALUE( 42528.00), SIMDE_FLOAT16_VALUE(-42528.00) } }, + { { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( -41.00), SIMDE_FLOAT16_VALUE( 285.00) }, + { SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -271.25), SIMDE_FLOAT16_VALUE( -257.50) }, + { SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( -180.50), SIMDE_FLOAT16_VALUE( 69.62), SIMDE_FLOAT16_VALUE( 131.38) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -1876.00), SIMDE_FLOAT16_VALUE( 1769.00), SIMDE_FLOAT16_VALUE( 46432.00), SIMDE_FLOAT16_VALUE(-46208.00) } }, + { { SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -247.00), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 17.94) }, + { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( -110.75), SIMDE_FLOAT16_VALUE( 18.20) }, + { SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 97.31) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 15128.00), SIMDE_FLOAT16_VALUE(-15168.00), SIMDE_FLOAT16_VALUE( 1695.00), SIMDE_FLOAT16_VALUE( -1617.00) } }, + { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00) }, + { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( -151.12) }, + { SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44), SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( 75.88) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-14904.00), SIMDE_FLOAT16_VALUE( 13320.00), SIMDE_FLOAT16_VALUE( 12312.00), SIMDE_FLOAT16_VALUE(-11272.00) } } + }; - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x4_t r; - SIMDE_CONSTIFY_2_( - simde_vcmla_rot270_lane_f16, r, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_(simde_vcmla_rot270_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); - } + } + return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t r = simde_vcmla_rot270_lane_f16(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -121,7 +89,8 @@ static int test_simde_vcmla_rot270_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmla_rot270_lane_f32(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmla_rot270_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[2]; @@ -130,48 +99,49 @@ static int test_simde_vcmla_rot270_lane_f32(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[2]; } test_vec[] = { - {{SIMDE_FLOAT32_C(83.21), SIMDE_FLOAT32_C(417.90)}, - {SIMDE_FLOAT32_C(-875.72), SIMDE_FLOAT32_C(830.54)}, - {SIMDE_FLOAT32_C(-633.53), SIMDE_FLOAT32_C(832.17)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-526088.812500), SIMDE_FLOAT32_C(526589.937500)}}, - {{SIMDE_FLOAT32_C(-890.17), SIMDE_FLOAT32_C(649.92)}, - {SIMDE_FLOAT32_C(-111.22), SIMDE_FLOAT32_C(-830.36)}, - {SIMDE_FLOAT32_C(59.76), SIMDE_FLOAT32_C(970.61)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-50512.480469), SIMDE_FLOAT32_C(50272.230469)}}, - {{SIMDE_FLOAT32_C(522.31), SIMDE_FLOAT32_C(-822.40)}, - {SIMDE_FLOAT32_C(411.34), SIMDE_FLOAT32_C(-692.35)}, - {SIMDE_FLOAT32_C(648.71), SIMDE_FLOAT32_C(385.20)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-448612.062500), SIMDE_FLOAT32_C(448311.968750)}}, - {{SIMDE_FLOAT32_C(479.18), SIMDE_FLOAT32_C(-793.73)}, - {SIMDE_FLOAT32_C(-740.26), SIMDE_FLOAT32_C(245.04)}, - {SIMDE_FLOAT32_C(229.26), SIMDE_FLOAT32_C(-113.23)}, - INT32_C(0), - {SIMDE_FLOAT32_C(56657.046875), SIMDE_FLOAT32_C(-56971.597656)}}, - {{SIMDE_FLOAT32_C(331.48), SIMDE_FLOAT32_C(-677.34)}, - {SIMDE_FLOAT32_C(97.30), SIMDE_FLOAT32_C(-52.10)}, - {SIMDE_FLOAT32_C(239.17), SIMDE_FLOAT32_C(469.68)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-12129.276367), SIMDE_FLOAT32_C(11783.416992)}}, - {{SIMDE_FLOAT32_C(-543.40), SIMDE_FLOAT32_C(826.85)}, - {SIMDE_FLOAT32_C(226.38), SIMDE_FLOAT32_C(178.84)}, - {SIMDE_FLOAT32_C(181.71), SIMDE_FLOAT32_C(420.52)}, - INT32_C(0), - {SIMDE_FLOAT32_C(31953.617188), SIMDE_FLOAT32_C(-31670.166016)}}, - {{SIMDE_FLOAT32_C(-698.84), SIMDE_FLOAT32_C(-151.15)}, - {SIMDE_FLOAT32_C(-388.27), SIMDE_FLOAT32_C(350.81)}, - {SIMDE_FLOAT32_C(890.40), SIMDE_FLOAT32_C(-664.75)}, - INT32_C(0), - {SIMDE_FLOAT32_C(311662.375000), SIMDE_FLOAT32_C(-312512.375000)}}, - {{SIMDE_FLOAT32_C(-617.94), SIMDE_FLOAT32_C(190.84)}, - {SIMDE_FLOAT32_C(218.13), SIMDE_FLOAT32_C(-328.97)}, - {SIMDE_FLOAT32_C(-549.59), SIMDE_FLOAT32_C(-459.89)}, - INT32_C(0), - {SIMDE_FLOAT32_C(180180.687500), SIMDE_FLOAT32_C(-180607.796875)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT32_C( 83.21), SIMDE_FLOAT32_C( 417.90) }, + { SIMDE_FLOAT32_C( -875.72), SIMDE_FLOAT32_C( 830.54) }, + { SIMDE_FLOAT32_C( -633.53), SIMDE_FLOAT32_C( 832.17) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-526088.812500), SIMDE_FLOAT32_C(526589.937500) } }, + { { SIMDE_FLOAT32_C( -890.17), SIMDE_FLOAT32_C( 649.92) }, + { SIMDE_FLOAT32_C( -111.22), SIMDE_FLOAT32_C( -830.36) }, + { SIMDE_FLOAT32_C( 59.76), SIMDE_FLOAT32_C( 970.61) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-50512.480469), SIMDE_FLOAT32_C(50272.230469) } }, + { { SIMDE_FLOAT32_C( 522.31), SIMDE_FLOAT32_C( -822.40) }, + { SIMDE_FLOAT32_C( 411.34), SIMDE_FLOAT32_C( -692.35) }, + { SIMDE_FLOAT32_C( 648.71), SIMDE_FLOAT32_C( 385.20) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-448612.062500), SIMDE_FLOAT32_C(448311.968750) } }, + { { SIMDE_FLOAT32_C( 479.18), SIMDE_FLOAT32_C( -793.73) }, + { SIMDE_FLOAT32_C( -740.26), SIMDE_FLOAT32_C( 245.04) }, + { SIMDE_FLOAT32_C( 229.26), SIMDE_FLOAT32_C( -113.23) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(56657.046875), SIMDE_FLOAT32_C(-56971.597656) } }, + { { SIMDE_FLOAT32_C( 331.48), SIMDE_FLOAT32_C( -677.34) }, + { SIMDE_FLOAT32_C( 97.30), SIMDE_FLOAT32_C( -52.10) }, + { SIMDE_FLOAT32_C( 239.17), SIMDE_FLOAT32_C( 469.68) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-12129.276367), SIMDE_FLOAT32_C(11783.416992) } }, + { { SIMDE_FLOAT32_C( -543.40), SIMDE_FLOAT32_C( 826.85) }, + { SIMDE_FLOAT32_C( 226.38), SIMDE_FLOAT32_C( 178.84) }, + { SIMDE_FLOAT32_C( 181.71), SIMDE_FLOAT32_C( 420.52) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(31953.617188), SIMDE_FLOAT32_C(-31670.166016) } }, + { { SIMDE_FLOAT32_C( -698.84), SIMDE_FLOAT32_C( -151.15) }, + { SIMDE_FLOAT32_C( -388.27), SIMDE_FLOAT32_C( 350.81) }, + { SIMDE_FLOAT32_C( 890.40), SIMDE_FLOAT32_C( -664.75) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(311662.375000), SIMDE_FLOAT32_C(-312512.375000) } }, + { { SIMDE_FLOAT32_C( -617.94), SIMDE_FLOAT32_C( 190.84) }, + { SIMDE_FLOAT32_C( 218.13), SIMDE_FLOAT32_C( -328.97) }, + { SIMDE_FLOAT32_C( -549.59), SIMDE_FLOAT32_C( -459.89) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(180180.687500), SIMDE_FLOAT32_C(-180607.796875) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); @@ -179,17 +149,18 @@ static int test_simde_vcmla_rot270_lane_f32(SIMDE_MUNIT_TEST_ARGS) { simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 0, 0, 0, 0, 0, 0, 0}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vcmla_rot270_lane_f32(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -200,7 +171,9 @@ static int test_simde_vcmla_rot270_lane_f32(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmla_rot270_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { + +static int +test_simde_vcmla_rot270_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float16_t r_[4]; @@ -209,118 +182,78 @@ static int test_simde_vcmla_rot270_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[4]; } test_vec[] = { - {{SIMDE_FLOAT16_VALUE(816.50), SIMDE_FLOAT16_VALUE(773.50), - SIMDE_FLOAT16_VALUE(-377.75), SIMDE_FLOAT16_VALUE(544.50)}, - {SIMDE_FLOAT16_VALUE(-188.38), SIMDE_FLOAT16_VALUE(269.75), - SIMDE_FLOAT16_VALUE(115.62), SIMDE_FLOAT16_VALUE(-544.00)}, - {SIMDE_FLOAT16_VALUE(42.90), SIMDE_FLOAT16_VALUE(924.50), - SIMDE_FLOAT16_VALUE(484.00), SIMDE_FLOAT16_VALUE(-757.00), - SIMDE_FLOAT16_VALUE(730.00), SIMDE_FLOAT16_VALUE(103.12), - SIMDE_FLOAT16_VALUE(-381.25), SIMDE_FLOAT16_VALUE(898.50)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(12392.00), SIMDE_FLOAT16_VALUE(-10800.00), - SIMDE_FLOAT16_VALUE(-23712.00), SIMDE_FLOAT16_VALUE(23888.00)}}, - {{SIMDE_FLOAT16_VALUE(293.00), SIMDE_FLOAT16_VALUE(-114.06), - SIMDE_FLOAT16_VALUE(-137.62), SIMDE_FLOAT16_VALUE(722.50)}, - {SIMDE_FLOAT16_VALUE(-6.33), SIMDE_FLOAT16_VALUE(161.38), - SIMDE_FLOAT16_VALUE(115.50), SIMDE_FLOAT16_VALUE(70.38)}, - {SIMDE_FLOAT16_VALUE(-899.00), SIMDE_FLOAT16_VALUE(32.51), - SIMDE_FLOAT16_VALUE(359.00), SIMDE_FLOAT16_VALUE(342.25), - SIMDE_FLOAT16_VALUE(913.00), SIMDE_FLOAT16_VALUE(740.50), - SIMDE_FLOAT16_VALUE(-436.25), SIMDE_FLOAT16_VALUE(-904.00)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(5536.00), SIMDE_FLOAT16_VALUE(-5360.00), - SIMDE_FLOAT16_VALUE(2150.00), SIMDE_FLOAT16_VALUE(-1565.00)}}, - {{SIMDE_FLOAT16_VALUE(-335.25), SIMDE_FLOAT16_VALUE(-252.38), - SIMDE_FLOAT16_VALUE(227.25), SIMDE_FLOAT16_VALUE(-8.67)}, - {SIMDE_FLOAT16_VALUE(490.50), SIMDE_FLOAT16_VALUE(507.50), - SIMDE_FLOAT16_VALUE(-979.50), SIMDE_FLOAT16_VALUE(-718.00)}, - {SIMDE_FLOAT16_VALUE(-10.20), SIMDE_FLOAT16_VALUE(205.75), - SIMDE_FLOAT16_VALUE(-797.50), SIMDE_FLOAT16_VALUE(741.50), - SIMDE_FLOAT16_VALUE(-835.00), SIMDE_FLOAT16_VALUE(-308.00), - SIMDE_FLOAT16_VALUE(-674.50), SIMDE_FLOAT16_VALUE(-247.00)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-5512.00), SIMDE_FLOAT16_VALUE(4924.00), - SIMDE_FLOAT16_VALUE(7552.00), SIMDE_FLOAT16_VALUE(-7336.00)}}, - {{SIMDE_FLOAT16_VALUE(-210.88), SIMDE_FLOAT16_VALUE(-430.75), - SIMDE_FLOAT16_VALUE(25.50), SIMDE_FLOAT16_VALUE(-44.50)}, - {SIMDE_FLOAT16_VALUE(142.00), SIMDE_FLOAT16_VALUE(-66.00), - SIMDE_FLOAT16_VALUE(-131.50), SIMDE_FLOAT16_VALUE(462.50)}, - {SIMDE_FLOAT16_VALUE(-361.00), SIMDE_FLOAT16_VALUE(85.62), - SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(749.50), - SIMDE_FLOAT16_VALUE(-975.50), SIMDE_FLOAT16_VALUE(383.50), - SIMDE_FLOAT16_VALUE(500.25), SIMDE_FLOAT16_VALUE(517.00)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-5864.00), SIMDE_FLOAT16_VALUE(5220.00), - SIMDE_FLOAT16_VALUE(39616.00), SIMDE_FLOAT16_VALUE(-39648.00)}}, - {{SIMDE_FLOAT16_VALUE(540.00), SIMDE_FLOAT16_VALUE(-927.50), - SIMDE_FLOAT16_VALUE(17.94), SIMDE_FLOAT16_VALUE(340.75)}, - {SIMDE_FLOAT16_VALUE(-397.75), SIMDE_FLOAT16_VALUE(166.12), - SIMDE_FLOAT16_VALUE(854.00), SIMDE_FLOAT16_VALUE(-110.75)}, - {SIMDE_FLOAT16_VALUE(-53.36), SIMDE_FLOAT16_VALUE(-465.00), - SIMDE_FLOAT16_VALUE(-104.00), SIMDE_FLOAT16_VALUE(-348.00), - SIMDE_FLOAT16_VALUE(-370.75), SIMDE_FLOAT16_VALUE(18.20), - SIMDE_FLOAT16_VALUE(-209.25), SIMDE_FLOAT16_VALUE(-856.00)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-8328.00), SIMDE_FLOAT16_VALUE(7940.00), - SIMDE_FLOAT16_VALUE(5928.00), SIMDE_FLOAT16_VALUE(-5572.00)}}, - {{SIMDE_FLOAT16_VALUE(251.25), SIMDE_FLOAT16_VALUE(523.50), - SIMDE_FLOAT16_VALUE(-62.34), SIMDE_FLOAT16_VALUE(-412.00)}, - {SIMDE_FLOAT16_VALUE(818.50), SIMDE_FLOAT16_VALUE(-266.25), - SIMDE_FLOAT16_VALUE(558.50), SIMDE_FLOAT16_VALUE(-474.75)}, - {SIMDE_FLOAT16_VALUE(583.50), SIMDE_FLOAT16_VALUE(58.66), - SIMDE_FLOAT16_VALUE(875.50), SIMDE_FLOAT16_VALUE(382.50), - SIMDE_FLOAT16_VALUE(-535.00), SIMDE_FLOAT16_VALUE(374.50), - SIMDE_FLOAT16_VALUE(89.81), SIMDE_FLOAT16_VALUE(432.50)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-15368.00), SIMDE_FLOAT16_VALUE(16144.00), - SIMDE_FLOAT16_VALUE(-27904.00), SIMDE_FLOAT16_VALUE(27440.00)}}, - {{SIMDE_FLOAT16_VALUE(83.19), SIMDE_FLOAT16_VALUE(418.00), - SIMDE_FLOAT16_VALUE(-875.50), SIMDE_FLOAT16_VALUE(830.50)}, - {SIMDE_FLOAT16_VALUE(-633.50), SIMDE_FLOAT16_VALUE(832.00), - SIMDE_FLOAT16_VALUE(-890.00), SIMDE_FLOAT16_VALUE(650.00)}, - {SIMDE_FLOAT16_VALUE(-31.19), SIMDE_FLOAT16_VALUE(-830.50), - SIMDE_FLOAT16_VALUE(59.75), SIMDE_FLOAT16_VALUE(970.50), - SIMDE_FLOAT16_VALUE(522.50), SIMDE_FLOAT16_VALUE(-822.50), - SIMDE_FLOAT16_VALUE(411.25), SIMDE_FLOAT16_VALUE(-692.50)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-25872.00), SIMDE_FLOAT16_VALUE(26368.00), - SIMDE_FLOAT16_VALUE(-21152.00), SIMDE_FLOAT16_VALUE(21104.00)}}, - {{SIMDE_FLOAT16_VALUE(648.50), SIMDE_FLOAT16_VALUE(385.25), - SIMDE_FLOAT16_VALUE(479.25), SIMDE_FLOAT16_VALUE(-793.50)}, - {SIMDE_FLOAT16_VALUE(-740.50), SIMDE_FLOAT16_VALUE(245.00), - SIMDE_FLOAT16_VALUE(229.25), SIMDE_FLOAT16_VALUE(-113.25)}, - {SIMDE_FLOAT16_VALUE(331.50), SIMDE_FLOAT16_VALUE(-75.25), - SIMDE_FLOAT16_VALUE(97.31), SIMDE_FLOAT16_VALUE(-52.09), - SIMDE_FLOAT16_VALUE(239.12), SIMDE_FLOAT16_VALUE(469.75), - SIMDE_FLOAT16_VALUE(-543.50), SIMDE_FLOAT16_VALUE(827.00)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-17792.00), SIMDE_FLOAT16_VALUE(18816.00), - SIMDE_FLOAT16_VALUE(9000.00), SIMDE_FLOAT16_VALUE(-9312.00)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT16_VALUE( 816.50), SIMDE_FLOAT16_VALUE( 773.50), SIMDE_FLOAT16_VALUE( -377.75), SIMDE_FLOAT16_VALUE( 544.50) }, + { SIMDE_FLOAT16_VALUE( -188.38), SIMDE_FLOAT16_VALUE( 269.75), SIMDE_FLOAT16_VALUE( 115.62), SIMDE_FLOAT16_VALUE( -544.00) }, + { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( 484.00), SIMDE_FLOAT16_VALUE( -757.00), + SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 12392.00), SIMDE_FLOAT16_VALUE(-10800.00), SIMDE_FLOAT16_VALUE(-23712.00), SIMDE_FLOAT16_VALUE( 23888.00) } }, + { { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, + { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, + { SIMDE_FLOAT16_VALUE( -899.00), SIMDE_FLOAT16_VALUE( 32.51), SIMDE_FLOAT16_VALUE( 359.00), SIMDE_FLOAT16_VALUE( 342.25), + SIMDE_FLOAT16_VALUE( 913.00), SIMDE_FLOAT16_VALUE( 740.50), SIMDE_FLOAT16_VALUE( -436.25), SIMDE_FLOAT16_VALUE( -904.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 5536.00), SIMDE_FLOAT16_VALUE( -5360.00), SIMDE_FLOAT16_VALUE( 2150.00), SIMDE_FLOAT16_VALUE( -1565.00) } }, + { { SIMDE_FLOAT16_VALUE( -335.25), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( 227.25), SIMDE_FLOAT16_VALUE( -8.67) }, + { SIMDE_FLOAT16_VALUE( 490.50), SIMDE_FLOAT16_VALUE( 507.50), SIMDE_FLOAT16_VALUE( -979.50), SIMDE_FLOAT16_VALUE( -718.00) }, + { SIMDE_FLOAT16_VALUE( -10.20), SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -797.50), SIMDE_FLOAT16_VALUE( 741.50), + SIMDE_FLOAT16_VALUE( -835.00), SIMDE_FLOAT16_VALUE( -308.00), SIMDE_FLOAT16_VALUE( -674.50), SIMDE_FLOAT16_VALUE( -247.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( -5512.00), SIMDE_FLOAT16_VALUE( 4924.00), SIMDE_FLOAT16_VALUE( 7552.00), SIMDE_FLOAT16_VALUE( -7336.00) } }, + { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 25.50), SIMDE_FLOAT16_VALUE( -44.50) }, + { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -66.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, + { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 85.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50), + SIMDE_FLOAT16_VALUE( -975.50), SIMDE_FLOAT16_VALUE( 383.50), SIMDE_FLOAT16_VALUE( 500.25), SIMDE_FLOAT16_VALUE( 517.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -5864.00), SIMDE_FLOAT16_VALUE( 5220.00), SIMDE_FLOAT16_VALUE( 39616.00), SIMDE_FLOAT16_VALUE(-39648.00) } }, + { { SIMDE_FLOAT16_VALUE( 540.00), SIMDE_FLOAT16_VALUE( -927.50), SIMDE_FLOAT16_VALUE( 17.94), SIMDE_FLOAT16_VALUE( 340.75) }, + { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( 854.00), SIMDE_FLOAT16_VALUE( -110.75) }, + { SIMDE_FLOAT16_VALUE( -53.36), SIMDE_FLOAT16_VALUE( -465.00), SIMDE_FLOAT16_VALUE( -104.00), SIMDE_FLOAT16_VALUE( -348.00), + SIMDE_FLOAT16_VALUE( -370.75), SIMDE_FLOAT16_VALUE( 18.20), SIMDE_FLOAT16_VALUE( -209.25), SIMDE_FLOAT16_VALUE( -856.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( -8328.00), SIMDE_FLOAT16_VALUE( 7940.00), SIMDE_FLOAT16_VALUE( 5928.00), SIMDE_FLOAT16_VALUE( -5572.00) } }, + { { SIMDE_FLOAT16_VALUE( 251.25), SIMDE_FLOAT16_VALUE( 523.50), SIMDE_FLOAT16_VALUE( -62.34), SIMDE_FLOAT16_VALUE( -412.00) }, + { SIMDE_FLOAT16_VALUE( 818.50), SIMDE_FLOAT16_VALUE( -266.25), SIMDE_FLOAT16_VALUE( 558.50), SIMDE_FLOAT16_VALUE( -474.75) }, + { SIMDE_FLOAT16_VALUE( 583.50), SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 875.50), SIMDE_FLOAT16_VALUE( 382.50), + SIMDE_FLOAT16_VALUE( -535.00), SIMDE_FLOAT16_VALUE( 374.50), SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 432.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-15368.00), SIMDE_FLOAT16_VALUE( 16144.00), SIMDE_FLOAT16_VALUE(-27904.00), SIMDE_FLOAT16_VALUE( 27440.00) } }, + { { SIMDE_FLOAT16_VALUE( 83.19), SIMDE_FLOAT16_VALUE( 418.00), SIMDE_FLOAT16_VALUE( -875.50), SIMDE_FLOAT16_VALUE( 830.50) }, + { SIMDE_FLOAT16_VALUE( -633.50), SIMDE_FLOAT16_VALUE( 832.00), SIMDE_FLOAT16_VALUE( -890.00), SIMDE_FLOAT16_VALUE( 650.00) }, + { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -830.50), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( 970.50), + SIMDE_FLOAT16_VALUE( 522.50), SIMDE_FLOAT16_VALUE( -822.50), SIMDE_FLOAT16_VALUE( 411.25), SIMDE_FLOAT16_VALUE( -692.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-25872.00), SIMDE_FLOAT16_VALUE( 26368.00), SIMDE_FLOAT16_VALUE(-21152.00), SIMDE_FLOAT16_VALUE( 21104.00) } }, + { { SIMDE_FLOAT16_VALUE( 648.50), SIMDE_FLOAT16_VALUE( 385.25), SIMDE_FLOAT16_VALUE( 479.25), SIMDE_FLOAT16_VALUE( -793.50) }, + { SIMDE_FLOAT16_VALUE( -740.50), SIMDE_FLOAT16_VALUE( 245.00), SIMDE_FLOAT16_VALUE( 229.25), SIMDE_FLOAT16_VALUE( -113.25) }, + { SIMDE_FLOAT16_VALUE( 331.50), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( 97.31), SIMDE_FLOAT16_VALUE( -52.09), + SIMDE_FLOAT16_VALUE( 239.12), SIMDE_FLOAT16_VALUE( 469.75), SIMDE_FLOAT16_VALUE( -543.50), SIMDE_FLOAT16_VALUE( 827.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-17792.00), SIMDE_FLOAT16_VALUE( 18816.00), SIMDE_FLOAT16_VALUE( 9000.00), SIMDE_FLOAT16_VALUE( -9312.00) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); - + simde_float16x4_t r; - SIMDE_CONSTIFY_2_( - simde_vcmla_rot270_laneq_f16, r, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_(simde_vcmla_rot270_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + } - + return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x4_t r = simde_vcmla_rot270_laneq_f16(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -331,7 +264,8 @@ static int test_simde_vcmla_rot270_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmla_rot270_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmla_rot270_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[2]; @@ -340,77 +274,65 @@ static int test_simde_vcmla_rot270_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[2]; } test_vec[] = { - {{SIMDE_FLOAT32_C(-470.26), SIMDE_FLOAT32_C(81.87)}, - {SIMDE_FLOAT32_C(72.27), SIMDE_FLOAT32_C(-992.59)}, - {SIMDE_FLOAT32_C(-615.67), SIMDE_FLOAT32_C(620.28), - SIMDE_FLOAT32_C(-606.51), SIMDE_FLOAT32_C(-327.77)}, - INT32_C(0), - {SIMDE_FLOAT32_C(610637.625000), SIMDE_FLOAT32_C(-611026.000000)}}, - {{SIMDE_FLOAT32_C(-331.80), SIMDE_FLOAT32_C(-605.94)}, - {SIMDE_FLOAT32_C(-295.86), SIMDE_FLOAT32_C(-275.42)}, - {SIMDE_FLOAT32_C(-752.64), SIMDE_FLOAT32_C(21.91), - SIMDE_FLOAT32_C(826.83), SIMDE_FLOAT32_C(599.82)}, - INT32_C(1), - {SIMDE_FLOAT32_C(-6366.252441), SIMDE_FLOAT32_C(5428.512207)}}, - {{SIMDE_FLOAT32_C(-30.36), SIMDE_FLOAT32_C(631.53)}, - {SIMDE_FLOAT32_C(850.75), SIMDE_FLOAT32_C(-263.55)}, - {SIMDE_FLOAT32_C(139.96), SIMDE_FLOAT32_C(859.14), - SIMDE_FLOAT32_C(-834.47), SIMDE_FLOAT32_C(216.10)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-36916.816406), SIMDE_FLOAT32_C(37517.988281)}}, - {{SIMDE_FLOAT32_C(995.86), SIMDE_FLOAT32_C(529.74)}, - {SIMDE_FLOAT32_C(79.08), SIMDE_FLOAT32_C(947.13)}, - {SIMDE_FLOAT32_C(122.02), SIMDE_FLOAT32_C(-250.00), - SIMDE_FLOAT32_C(-361.82), SIMDE_FLOAT32_C(265.24)}, - INT32_C(1), - {SIMDE_FLOAT32_C(-235786.640625), SIMDE_FLOAT32_C(237312.234375)}}, - {{SIMDE_FLOAT32_C(275.71), SIMDE_FLOAT32_C(2.71)}, - {SIMDE_FLOAT32_C(99.79), SIMDE_FLOAT32_C(-137.67)}, - {SIMDE_FLOAT32_C(-761.19), SIMDE_FLOAT32_C(813.19), - SIMDE_FLOAT32_C(-897.68), SIMDE_FLOAT32_C(653.58)}, - INT32_C(0), - {SIMDE_FLOAT32_C(105068.734375), SIMDE_FLOAT32_C(-104790.312500)}}, - {{SIMDE_FLOAT32_C(396.02), SIMDE_FLOAT32_C(413.06)}, - {SIMDE_FLOAT32_C(514.09), SIMDE_FLOAT32_C(-977.67)}, - {SIMDE_FLOAT32_C(-671.79), SIMDE_FLOAT32_C(-92.13), - SIMDE_FLOAT32_C(-441.32), SIMDE_FLOAT32_C(-374.27)}, - INT32_C(1), - {SIMDE_FLOAT32_C(90468.750000), SIMDE_FLOAT32_C(-89659.671875)}}, - {{SIMDE_FLOAT32_C(-151.97), SIMDE_FLOAT32_C(-79.55)}, - {SIMDE_FLOAT32_C(-214.62), SIMDE_FLOAT32_C(-614.75)}, - {SIMDE_FLOAT32_C(678.79), SIMDE_FLOAT32_C(783.83), - SIMDE_FLOAT32_C(493.05), SIMDE_FLOAT32_C(-896.00)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-417438.093750), SIMDE_FLOAT32_C(417206.593750)}}, - {{SIMDE_FLOAT32_C(104.13), SIMDE_FLOAT32_C(278.54)}, - {SIMDE_FLOAT32_C(171.54), SIMDE_FLOAT32_C(-682.63)}, - {SIMDE_FLOAT32_C(217.09), SIMDE_FLOAT32_C(49.35), - SIMDE_FLOAT32_C(256.50), SIMDE_FLOAT32_C(-92.04)}, - INT32_C(1), - {SIMDE_FLOAT32_C(-33583.660156), SIMDE_FLOAT32_C(33966.328125)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT32_C( -470.26), SIMDE_FLOAT32_C( 81.87) }, + { SIMDE_FLOAT32_C( 72.27), SIMDE_FLOAT32_C( -992.59) }, + { SIMDE_FLOAT32_C( -615.67), SIMDE_FLOAT32_C( 620.28), SIMDE_FLOAT32_C( -606.51), SIMDE_FLOAT32_C( -327.77) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(610637.625000), SIMDE_FLOAT32_C(-611026.000000) } }, + { { SIMDE_FLOAT32_C( -331.80), SIMDE_FLOAT32_C( -605.94) }, + { SIMDE_FLOAT32_C( -295.86), SIMDE_FLOAT32_C( -275.42) }, + { SIMDE_FLOAT32_C( -752.64), SIMDE_FLOAT32_C( 21.91), SIMDE_FLOAT32_C( 826.83), SIMDE_FLOAT32_C( 599.82) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-6366.252441), SIMDE_FLOAT32_C(5428.512207) } }, + { { SIMDE_FLOAT32_C( -30.36), SIMDE_FLOAT32_C( 631.53) }, + { SIMDE_FLOAT32_C( 850.75), SIMDE_FLOAT32_C( -263.55) }, + { SIMDE_FLOAT32_C( 139.96), SIMDE_FLOAT32_C( 859.14), SIMDE_FLOAT32_C( -834.47), SIMDE_FLOAT32_C( 216.10) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-36916.816406), SIMDE_FLOAT32_C(37517.988281) } }, + { { SIMDE_FLOAT32_C( 995.86), SIMDE_FLOAT32_C( 529.74) }, + { SIMDE_FLOAT32_C( 79.08), SIMDE_FLOAT32_C( 947.13) }, + { SIMDE_FLOAT32_C( 122.02), SIMDE_FLOAT32_C( -250.00), SIMDE_FLOAT32_C( -361.82), SIMDE_FLOAT32_C( 265.24) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-235786.640625), SIMDE_FLOAT32_C(237312.234375) } }, + { { SIMDE_FLOAT32_C( 275.71), SIMDE_FLOAT32_C( 2.71) }, + { SIMDE_FLOAT32_C( 99.79), SIMDE_FLOAT32_C( -137.67) }, + { SIMDE_FLOAT32_C( -761.19), SIMDE_FLOAT32_C( 813.19), SIMDE_FLOAT32_C( -897.68), SIMDE_FLOAT32_C( 653.58) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(105068.734375), SIMDE_FLOAT32_C(-104790.312500) } }, + { { SIMDE_FLOAT32_C( 396.02), SIMDE_FLOAT32_C( 413.06) }, + { SIMDE_FLOAT32_C( 514.09), SIMDE_FLOAT32_C( -977.67) }, + { SIMDE_FLOAT32_C( -671.79), SIMDE_FLOAT32_C( -92.13), SIMDE_FLOAT32_C( -441.32), SIMDE_FLOAT32_C( -374.27) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(90468.750000), SIMDE_FLOAT32_C(-89659.671875) } }, + { { SIMDE_FLOAT32_C( -151.97), SIMDE_FLOAT32_C( -79.55) }, + { SIMDE_FLOAT32_C( -214.62), SIMDE_FLOAT32_C( -614.75) }, + { SIMDE_FLOAT32_C( 678.79), SIMDE_FLOAT32_C( 783.83), SIMDE_FLOAT32_C( 493.05), SIMDE_FLOAT32_C( -896.00) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-417438.093750), SIMDE_FLOAT32_C(417206.593750) } }, + { { SIMDE_FLOAT32_C( 104.13), SIMDE_FLOAT32_C( 278.54) }, + { SIMDE_FLOAT32_C( 171.54), SIMDE_FLOAT32_C( -682.63) }, + { SIMDE_FLOAT32_C( 217.09), SIMDE_FLOAT32_C( 49.35), SIMDE_FLOAT32_C( 256.50), SIMDE_FLOAT32_C( -92.04) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-33583.660156), SIMDE_FLOAT32_C(33966.328125) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x2_t r; - SIMDE_CONSTIFY_2_( - simde_vcmla_rot270_laneq_f32, r, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), - test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_(simde_vcmla_rot270_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vcmla_rot270_laneq_f32(r_, a, b, lanes[i]); - simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -421,7 +343,8 @@ static int test_simde_vcmla_rot270_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmlaq_rot270_lane_f16(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmlaq_rot270_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float16_t r_[8]; @@ -430,153 +353,94 @@ static int test_simde_vcmlaq_rot270_lane_f16(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[8]; } test_vec[] = { - - {{SIMDE_FLOAT16_VALUE(226.38), SIMDE_FLOAT16_VALUE(178.88), - SIMDE_FLOAT16_VALUE(181.75), SIMDE_FLOAT16_VALUE(420.50), - SIMDE_FLOAT16_VALUE(-699.00), SIMDE_FLOAT16_VALUE(-151.12), - SIMDE_FLOAT16_VALUE(-388.25), SIMDE_FLOAT16_VALUE(350.75)}, - {SIMDE_FLOAT16_VALUE(890.50), SIMDE_FLOAT16_VALUE(-665.00), - SIMDE_FLOAT16_VALUE(-618.00), SIMDE_FLOAT16_VALUE(190.88), - SIMDE_FLOAT16_VALUE(218.12), SIMDE_FLOAT16_VALUE(-329.00), - SIMDE_FLOAT16_VALUE(-549.50), SIMDE_FLOAT16_VALUE(-460.00)}, - {SIMDE_FLOAT16_VALUE(-49.28), SIMDE_FLOAT16_VALUE(-695.50), - SIMDE_FLOAT16_VALUE(121.50), SIMDE_FLOAT16_VALUE(-78.44)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(32992.00), SIMDE_FLOAT16_VALUE(-32592.00), - SIMDE_FLOAT16_VALUE(-9224.00), SIMDE_FLOAT16_VALUE(9824.00), - SIMDE_FLOAT16_VALUE(15512.00), SIMDE_FLOAT16_VALUE(-16368.00), - SIMDE_FLOAT16_VALUE(22288.00), SIMDE_FLOAT16_VALUE(-22320.00)}}, - {{SIMDE_FLOAT16_VALUE(-869.50), SIMDE_FLOAT16_VALUE(-710.00), - SIMDE_FLOAT16_VALUE(457.25), SIMDE_FLOAT16_VALUE(579.00), - SIMDE_FLOAT16_VALUE(-786.50), SIMDE_FLOAT16_VALUE(-974.00), - SIMDE_FLOAT16_VALUE(-570.50), SIMDE_FLOAT16_VALUE(369.25)}, - {SIMDE_FLOAT16_VALUE(48.06), SIMDE_FLOAT16_VALUE(-40.00), - SIMDE_FLOAT16_VALUE(-52.00), SIMDE_FLOAT16_VALUE(75.88), - SIMDE_FLOAT16_VALUE(352.25), SIMDE_FLOAT16_VALUE(324.25), - SIMDE_FLOAT16_VALUE(485.50), SIMDE_FLOAT16_VALUE(43.50)}, - {SIMDE_FLOAT16_VALUE(-976.00), SIMDE_FLOAT16_VALUE(89.44), - SIMDE_FLOAT16_VALUE(-100.62), SIMDE_FLOAT16_VALUE(-463.75)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-4448.00), SIMDE_FLOAT16_VALUE(2868.00), - SIMDE_FLOAT16_VALUE(7244.00), SIMDE_FLOAT16_VALUE(-6208.00), - SIMDE_FLOAT16_VALUE(28208.00), SIMDE_FLOAT16_VALUE(-29968.00), - SIMDE_FLOAT16_VALUE(3320.00), SIMDE_FLOAT16_VALUE(-3522.00)}}, - {{SIMDE_FLOAT16_VALUE(-973.00), SIMDE_FLOAT16_VALUE(-159.62), - SIMDE_FLOAT16_VALUE(-377.00), SIMDE_FLOAT16_VALUE(-853.00), - SIMDE_FLOAT16_VALUE(831.00), SIMDE_FLOAT16_VALUE(661.00), - SIMDE_FLOAT16_VALUE(-717.00), SIMDE_FLOAT16_VALUE(891.50)}, - {SIMDE_FLOAT16_VALUE(535.50), SIMDE_FLOAT16_VALUE(621.50), - SIMDE_FLOAT16_VALUE(578.50), SIMDE_FLOAT16_VALUE(430.50), - SIMDE_FLOAT16_VALUE(-574.50), SIMDE_FLOAT16_VALUE(-319.75), - SIMDE_FLOAT16_VALUE(-735.00), SIMDE_FLOAT16_VALUE(248.50)}, - {SIMDE_FLOAT16_VALUE(32.59), SIMDE_FLOAT16_VALUE(192.38), - SIMDE_FLOAT16_VALUE(679.00), SIMDE_FLOAT16_VALUE(502.25)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(19280.00), SIMDE_FLOAT16_VALUE(-20416.00), - SIMDE_FLOAT16_VALUE(13656.00), SIMDE_FLOAT16_VALUE(-14888.00), - SIMDE_FLOAT16_VALUE(-9592.00), SIMDE_FLOAT16_VALUE(11080.00), - SIMDE_FLOAT16_VALUE(7384.00), SIMDE_FLOAT16_VALUE(-7208.00)}}, - {{SIMDE_FLOAT16_VALUE(-618.50), SIMDE_FLOAT16_VALUE(-70.81), - SIMDE_FLOAT16_VALUE(-596.50), SIMDE_FLOAT16_VALUE(810.50), - SIMDE_FLOAT16_VALUE(863.50), SIMDE_FLOAT16_VALUE(828.50), - SIMDE_FLOAT16_VALUE(-563.50), SIMDE_FLOAT16_VALUE(-576.50)}, - {SIMDE_FLOAT16_VALUE(-703.50), SIMDE_FLOAT16_VALUE(384.00), - SIMDE_FLOAT16_VALUE(-772.50), SIMDE_FLOAT16_VALUE(457.50), - SIMDE_FLOAT16_VALUE(296.00), SIMDE_FLOAT16_VALUE(653.00), - SIMDE_FLOAT16_VALUE(-121.00), SIMDE_FLOAT16_VALUE(945.50)}, - {SIMDE_FLOAT16_VALUE(-280.75), SIMDE_FLOAT16_VALUE(-31.45), - SIMDE_FLOAT16_VALUE(688.50), SIMDE_FLOAT16_VALUE(192.00)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-12696.00), SIMDE_FLOAT16_VALUE(12008.00), - SIMDE_FLOAT16_VALUE(-14984.00), SIMDE_FLOAT16_VALUE(15200.00), - SIMDE_FLOAT16_VALUE(-19680.00), SIMDE_FLOAT16_VALUE(21360.00), - SIMDE_FLOAT16_VALUE(-30304.00), SIMDE_FLOAT16_VALUE(29168.00)}}, - {{SIMDE_FLOAT16_VALUE(-523.00), SIMDE_FLOAT16_VALUE(-323.75), - SIMDE_FLOAT16_VALUE(-888.00), SIMDE_FLOAT16_VALUE(-283.75), - SIMDE_FLOAT16_VALUE(-117.75), SIMDE_FLOAT16_VALUE(-841.50), - SIMDE_FLOAT16_VALUE(665.00), SIMDE_FLOAT16_VALUE(-987.00)}, - {SIMDE_FLOAT16_VALUE(-643.00), SIMDE_FLOAT16_VALUE(-152.12), - SIMDE_FLOAT16_VALUE(964.00), SIMDE_FLOAT16_VALUE(920.00), - SIMDE_FLOAT16_VALUE(630.50), SIMDE_FLOAT16_VALUE(-669.50), - SIMDE_FLOAT16_VALUE(671.00), SIMDE_FLOAT16_VALUE(257.00)}, - {SIMDE_FLOAT16_VALUE(10.22), SIMDE_FLOAT16_VALUE(-857.50), - SIMDE_FLOAT16_VALUE(334.75), SIMDE_FLOAT16_VALUE(-617.50)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-2078.00), SIMDE_FLOAT16_VALUE(1231.00), - SIMDE_FLOAT16_VALUE(8512.00), SIMDE_FLOAT16_VALUE(-9688.00), - SIMDE_FLOAT16_VALUE(-6960.00), SIMDE_FLOAT16_VALUE(6000.00), - SIMDE_FLOAT16_VALUE(3292.00), SIMDE_FLOAT16_VALUE(-3614.00)}}, - {{SIMDE_FLOAT16_VALUE(-439.50), SIMDE_FLOAT16_VALUE(245.12), - SIMDE_FLOAT16_VALUE(111.06), SIMDE_FLOAT16_VALUE(520.50), - SIMDE_FLOAT16_VALUE(85.50), SIMDE_FLOAT16_VALUE(250.25), - SIMDE_FLOAT16_VALUE(-680.00), SIMDE_FLOAT16_VALUE(-750.00)}, - {SIMDE_FLOAT16_VALUE(-138.25), SIMDE_FLOAT16_VALUE(-14.62), - SIMDE_FLOAT16_VALUE(-921.50), SIMDE_FLOAT16_VALUE(225.88), - SIMDE_FLOAT16_VALUE(242.88), SIMDE_FLOAT16_VALUE(869.50), - SIMDE_FLOAT16_VALUE(298.00), SIMDE_FLOAT16_VALUE(105.69)}, - {SIMDE_FLOAT16_VALUE(-722.50), SIMDE_FLOAT16_VALUE(-8.75), - SIMDE_FLOAT16_VALUE(-245.75), SIMDE_FLOAT16_VALUE(915.50)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-311.50), SIMDE_FLOAT16_VALUE(117.25), - SIMDE_FLOAT16_VALUE(-1865.00), SIMDE_FLOAT16_VALUE(2496.00), - SIMDE_FLOAT16_VALUE(-7524.00), SIMDE_FLOAT16_VALUE(7860.00), - SIMDE_FLOAT16_VALUE(-1605.00), SIMDE_FLOAT16_VALUE(174.75)}}, - {{SIMDE_FLOAT16_VALUE(54.19), SIMDE_FLOAT16_VALUE(-928.00), - SIMDE_FLOAT16_VALUE(362.50), SIMDE_FLOAT16_VALUE(-936.50), - SIMDE_FLOAT16_VALUE(185.88), SIMDE_FLOAT16_VALUE(-244.38), - SIMDE_FLOAT16_VALUE(924.50), SIMDE_FLOAT16_VALUE(-644.00)}, - {SIMDE_FLOAT16_VALUE(-517.00), SIMDE_FLOAT16_VALUE(-615.00), - SIMDE_FLOAT16_VALUE(-751.50), SIMDE_FLOAT16_VALUE(-974.00), - SIMDE_FLOAT16_VALUE(-144.38), SIMDE_FLOAT16_VALUE(338.25), - SIMDE_FLOAT16_VALUE(705.00), SIMDE_FLOAT16_VALUE(116.88)}, - {SIMDE_FLOAT16_VALUE(49.38), SIMDE_FLOAT16_VALUE(-363.00), - SIMDE_FLOAT16_VALUE(-476.25), SIMDE_FLOAT16_VALUE(106.69)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-30304.00), SIMDE_FLOAT16_VALUE(29440.00), - SIMDE_FLOAT16_VALUE(-47744.00), SIMDE_FLOAT16_VALUE(47168.00), - SIMDE_FLOAT16_VALUE(16880.00), SIMDE_FLOAT16_VALUE(-16944.00), - SIMDE_FLOAT16_VALUE(6696.00), SIMDE_FLOAT16_VALUE(-6416.00)}}, - {{SIMDE_FLOAT16_VALUE(-726.00), SIMDE_FLOAT16_VALUE(-353.75), - SIMDE_FLOAT16_VALUE(268.50), SIMDE_FLOAT16_VALUE(729.00), - SIMDE_FLOAT16_VALUE(-470.25), SIMDE_FLOAT16_VALUE(81.88), - SIMDE_FLOAT16_VALUE(72.25), SIMDE_FLOAT16_VALUE(-992.50)}, - {SIMDE_FLOAT16_VALUE(-615.50), SIMDE_FLOAT16_VALUE(620.50), - SIMDE_FLOAT16_VALUE(-606.50), SIMDE_FLOAT16_VALUE(-327.75), - SIMDE_FLOAT16_VALUE(-331.75), SIMDE_FLOAT16_VALUE(-606.00), - SIMDE_FLOAT16_VALUE(-295.75), SIMDE_FLOAT16_VALUE(-275.50)}, - {SIMDE_FLOAT16_VALUE(-752.50), SIMDE_FLOAT16_VALUE(21.91), - SIMDE_FLOAT16_VALUE(827.00), SIMDE_FLOAT16_VALUE(600.00)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(12864.00), SIMDE_FLOAT16_VALUE(-13944.00), - SIMDE_FLOAT16_VALUE(-6912.00), SIMDE_FLOAT16_VALUE(7908.00), - SIMDE_FLOAT16_VALUE(-13744.00), SIMDE_FLOAT16_VALUE(13360.00), - SIMDE_FLOAT16_VALUE(-5964.00), SIMDE_FLOAT16_VALUE(5044.00)}} - + { { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( 420.50), + SIMDE_FLOAT16_VALUE( -699.00), SIMDE_FLOAT16_VALUE( -151.12), SIMDE_FLOAT16_VALUE( -388.25), SIMDE_FLOAT16_VALUE( 350.75) }, + { SIMDE_FLOAT16_VALUE( 890.50), SIMDE_FLOAT16_VALUE( -665.00), SIMDE_FLOAT16_VALUE( -618.00), SIMDE_FLOAT16_VALUE( 190.88), + SIMDE_FLOAT16_VALUE( 218.12), SIMDE_FLOAT16_VALUE( -329.00), SIMDE_FLOAT16_VALUE( -549.50), SIMDE_FLOAT16_VALUE( -460.00) }, + { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -695.50), SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 32992.00), SIMDE_FLOAT16_VALUE(-32592.00), SIMDE_FLOAT16_VALUE( -9224.00), SIMDE_FLOAT16_VALUE( 9824.00), + SIMDE_FLOAT16_VALUE( 15512.00), SIMDE_FLOAT16_VALUE(-16368.00), SIMDE_FLOAT16_VALUE( 22288.00), SIMDE_FLOAT16_VALUE(-22320.00) } }, + { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00), + SIMDE_FLOAT16_VALUE( -786.50), SIMDE_FLOAT16_VALUE( -974.00), SIMDE_FLOAT16_VALUE( -570.50), SIMDE_FLOAT16_VALUE( 369.25) }, + { SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( -40.00), SIMDE_FLOAT16_VALUE( -52.00), SIMDE_FLOAT16_VALUE( 75.88), + SIMDE_FLOAT16_VALUE( 352.25), SIMDE_FLOAT16_VALUE( 324.25), SIMDE_FLOAT16_VALUE( 485.50), SIMDE_FLOAT16_VALUE( 43.50) }, + { SIMDE_FLOAT16_VALUE( -976.00), SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -100.62), SIMDE_FLOAT16_VALUE( -463.75) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -4448.00), SIMDE_FLOAT16_VALUE( 2868.00), SIMDE_FLOAT16_VALUE( 7244.00), SIMDE_FLOAT16_VALUE( -6208.00), + SIMDE_FLOAT16_VALUE( 28208.00), SIMDE_FLOAT16_VALUE(-29968.00), SIMDE_FLOAT16_VALUE( 3320.00), SIMDE_FLOAT16_VALUE( -3522.00) } }, + { { SIMDE_FLOAT16_VALUE( -973.00), SIMDE_FLOAT16_VALUE( -159.62), SIMDE_FLOAT16_VALUE( -377.00), SIMDE_FLOAT16_VALUE( -853.00), + SIMDE_FLOAT16_VALUE( 831.00), SIMDE_FLOAT16_VALUE( 661.00), SIMDE_FLOAT16_VALUE( -717.00), SIMDE_FLOAT16_VALUE( 891.50) }, + { SIMDE_FLOAT16_VALUE( 535.50), SIMDE_FLOAT16_VALUE( 621.50), SIMDE_FLOAT16_VALUE( 578.50), SIMDE_FLOAT16_VALUE( 430.50), + SIMDE_FLOAT16_VALUE( -574.50), SIMDE_FLOAT16_VALUE( -319.75), SIMDE_FLOAT16_VALUE( -735.00), SIMDE_FLOAT16_VALUE( 248.50) }, + { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 192.38), SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 502.25) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 19280.00), SIMDE_FLOAT16_VALUE(-20416.00), SIMDE_FLOAT16_VALUE( 13656.00), SIMDE_FLOAT16_VALUE(-14888.00), + SIMDE_FLOAT16_VALUE( -9592.00), SIMDE_FLOAT16_VALUE( 11080.00), SIMDE_FLOAT16_VALUE( 7384.00), SIMDE_FLOAT16_VALUE( -7208.00) } }, + { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( -70.81), SIMDE_FLOAT16_VALUE( -596.50), SIMDE_FLOAT16_VALUE( 810.50), + SIMDE_FLOAT16_VALUE( 863.50), SIMDE_FLOAT16_VALUE( 828.50), SIMDE_FLOAT16_VALUE( -563.50), SIMDE_FLOAT16_VALUE( -576.50) }, + { SIMDE_FLOAT16_VALUE( -703.50), SIMDE_FLOAT16_VALUE( 384.00), SIMDE_FLOAT16_VALUE( -772.50), SIMDE_FLOAT16_VALUE( 457.50), + SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( 653.00), SIMDE_FLOAT16_VALUE( -121.00), SIMDE_FLOAT16_VALUE( 945.50) }, + { SIMDE_FLOAT16_VALUE( -280.75), SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( 688.50), SIMDE_FLOAT16_VALUE( 192.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-12696.00), SIMDE_FLOAT16_VALUE( 12008.00), SIMDE_FLOAT16_VALUE(-14984.00), SIMDE_FLOAT16_VALUE( 15200.00), + SIMDE_FLOAT16_VALUE(-19680.00), SIMDE_FLOAT16_VALUE( 21360.00), SIMDE_FLOAT16_VALUE(-30304.00), SIMDE_FLOAT16_VALUE( 29168.00) } }, + { { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -323.75), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -283.75), + SIMDE_FLOAT16_VALUE( -117.75), SIMDE_FLOAT16_VALUE( -841.50), SIMDE_FLOAT16_VALUE( 665.00), SIMDE_FLOAT16_VALUE( -987.00) }, + { SIMDE_FLOAT16_VALUE( -643.00), SIMDE_FLOAT16_VALUE( -152.12), SIMDE_FLOAT16_VALUE( 964.00), SIMDE_FLOAT16_VALUE( 920.00), + SIMDE_FLOAT16_VALUE( 630.50), SIMDE_FLOAT16_VALUE( -669.50), SIMDE_FLOAT16_VALUE( 671.00), SIMDE_FLOAT16_VALUE( 257.00) }, + { SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -857.50), SIMDE_FLOAT16_VALUE( 334.75), SIMDE_FLOAT16_VALUE( -617.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( -2078.00), SIMDE_FLOAT16_VALUE( 1231.00), SIMDE_FLOAT16_VALUE( 8512.00), SIMDE_FLOAT16_VALUE( -9688.00), + SIMDE_FLOAT16_VALUE( -6960.00), SIMDE_FLOAT16_VALUE( 6000.00), SIMDE_FLOAT16_VALUE( 3292.00), SIMDE_FLOAT16_VALUE( -3614.00) } }, + { { SIMDE_FLOAT16_VALUE( -439.50), SIMDE_FLOAT16_VALUE( 245.12), SIMDE_FLOAT16_VALUE( 111.06), SIMDE_FLOAT16_VALUE( 520.50), + SIMDE_FLOAT16_VALUE( 85.50), SIMDE_FLOAT16_VALUE( 250.25), SIMDE_FLOAT16_VALUE( -680.00), SIMDE_FLOAT16_VALUE( -750.00) }, + { SIMDE_FLOAT16_VALUE( -138.25), SIMDE_FLOAT16_VALUE( -14.62), SIMDE_FLOAT16_VALUE( -921.50), SIMDE_FLOAT16_VALUE( 225.88), + SIMDE_FLOAT16_VALUE( 242.88), SIMDE_FLOAT16_VALUE( 869.50), SIMDE_FLOAT16_VALUE( 298.00), SIMDE_FLOAT16_VALUE( 105.69) }, + { SIMDE_FLOAT16_VALUE( -722.50), SIMDE_FLOAT16_VALUE( -8.75), SIMDE_FLOAT16_VALUE( -245.75), SIMDE_FLOAT16_VALUE( 915.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -311.50), SIMDE_FLOAT16_VALUE( 117.25), SIMDE_FLOAT16_VALUE( -1865.00), SIMDE_FLOAT16_VALUE( 2496.00), + SIMDE_FLOAT16_VALUE( -7524.00), SIMDE_FLOAT16_VALUE( 7860.00), SIMDE_FLOAT16_VALUE( -1605.00), SIMDE_FLOAT16_VALUE( 174.75) } }, + { { SIMDE_FLOAT16_VALUE( 54.19), SIMDE_FLOAT16_VALUE( -928.00), SIMDE_FLOAT16_VALUE( 362.50), SIMDE_FLOAT16_VALUE( -936.50), + SIMDE_FLOAT16_VALUE( 185.88), SIMDE_FLOAT16_VALUE( -244.38), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( -644.00) }, + { SIMDE_FLOAT16_VALUE( -517.00), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -751.50), SIMDE_FLOAT16_VALUE( -974.00), + SIMDE_FLOAT16_VALUE( -144.38), SIMDE_FLOAT16_VALUE( 338.25), SIMDE_FLOAT16_VALUE( 705.00), SIMDE_FLOAT16_VALUE( 116.88) }, + { SIMDE_FLOAT16_VALUE( 49.38), SIMDE_FLOAT16_VALUE( -363.00), SIMDE_FLOAT16_VALUE( -476.25), SIMDE_FLOAT16_VALUE( 106.69) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-30304.00), SIMDE_FLOAT16_VALUE( 29440.00), SIMDE_FLOAT16_VALUE(-47744.00), SIMDE_FLOAT16_VALUE( 47168.00), + SIMDE_FLOAT16_VALUE( 16880.00), SIMDE_FLOAT16_VALUE(-16944.00), SIMDE_FLOAT16_VALUE( 6696.00), SIMDE_FLOAT16_VALUE( -6416.00) } }, + { { SIMDE_FLOAT16_VALUE( -726.00), SIMDE_FLOAT16_VALUE( -353.75), SIMDE_FLOAT16_VALUE( 268.50), SIMDE_FLOAT16_VALUE( 729.00), + SIMDE_FLOAT16_VALUE( -470.25), SIMDE_FLOAT16_VALUE( 81.88), SIMDE_FLOAT16_VALUE( 72.25), SIMDE_FLOAT16_VALUE( -992.50) }, + { SIMDE_FLOAT16_VALUE( -615.50), SIMDE_FLOAT16_VALUE( 620.50), SIMDE_FLOAT16_VALUE( -606.50), SIMDE_FLOAT16_VALUE( -327.75), + SIMDE_FLOAT16_VALUE( -331.75), SIMDE_FLOAT16_VALUE( -606.00), SIMDE_FLOAT16_VALUE( -295.75), SIMDE_FLOAT16_VALUE( -275.50) }, + { SIMDE_FLOAT16_VALUE( -752.50), SIMDE_FLOAT16_VALUE( 21.91), SIMDE_FLOAT16_VALUE( 827.00), SIMDE_FLOAT16_VALUE( 600.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 12864.00), SIMDE_FLOAT16_VALUE(-13944.00), SIMDE_FLOAT16_VALUE( -6912.00), SIMDE_FLOAT16_VALUE( 7908.00), + SIMDE_FLOAT16_VALUE(-13744.00), SIMDE_FLOAT16_VALUE( 13360.00), SIMDE_FLOAT16_VALUE( -5964.00), SIMDE_FLOAT16_VALUE( 5044.00) } } + }; - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x8_t r; - SIMDE_CONSTIFY_2_( - simde_vcmlaq_rot270_lane_f16, r, - (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - test_vec[i].lane, r_, a, b); - simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), - 1); - } + SIMDE_CONSTIFY_2_(simde_vcmlaq_rot270_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + } + return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x8_t r = simde_vcmlaq_lane_f16(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -587,7 +451,8 @@ static int test_simde_vcmlaq_rot270_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmlaq_rot270_lane_f32(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmlaq_rot270_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[4]; @@ -596,90 +461,66 @@ static int test_simde_vcmlaq_rot270_lane_f32(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[4]; } test_vec[] = { - {{SIMDE_FLOAT32_C(301.65), SIMDE_FLOAT32_C(490.71), - SIMDE_FLOAT32_C(-744.66), SIMDE_FLOAT32_C(-738.17)}, - {SIMDE_FLOAT32_C(-301.20), SIMDE_FLOAT32_C(-904.34), - SIMDE_FLOAT32_C(771.98), SIMDE_FLOAT32_C(233.71)}, - {SIMDE_FLOAT32_C(830.18), SIMDE_FLOAT32_C(979.39)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-750463.375000), SIMDE_FLOAT32_C(751255.687500), - SIMDE_FLOAT32_C(193276.718750), SIMDE_FLOAT32_C(-194759.546875)}}, - {{SIMDE_FLOAT32_C(-38.01), SIMDE_FLOAT32_C(151.61), - SIMDE_FLOAT32_C(201.45), SIMDE_FLOAT32_C(-747.32)}, - {SIMDE_FLOAT32_C(-331.17), SIMDE_FLOAT32_C(7.62), - SIMDE_FLOAT32_C(-454.77), SIMDE_FLOAT32_C(-381.81)}, - {SIMDE_FLOAT32_C(236.39), SIMDE_FLOAT32_C(-158.94)}, - INT32_C(0), - {SIMDE_FLOAT32_C(1763.281738), SIMDE_FLOAT32_C(-1649.681763), - SIMDE_FLOAT32_C(-90054.617188), SIMDE_FLOAT32_C(89508.742188)}}, - {{SIMDE_FLOAT32_C(605.85), SIMDE_FLOAT32_C(244.27), - SIMDE_FLOAT32_C(-426.53), SIMDE_FLOAT32_C(-969.18)}, - {SIMDE_FLOAT32_C(322.13), SIMDE_FLOAT32_C(863.77), - SIMDE_FLOAT32_C(-685.35), SIMDE_FLOAT32_C(-710.70)}, - {SIMDE_FLOAT32_C(-791.12), SIMDE_FLOAT32_C(373.53)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-682739.875000), SIMDE_FLOAT32_C(683590.000000), - SIMDE_FLOAT32_C(561822.437500), SIMDE_FLOAT32_C(-563218.187500)}}, - {{SIMDE_FLOAT32_C(-606.46), SIMDE_FLOAT32_C(-507.55), - SIMDE_FLOAT32_C(-68.24), SIMDE_FLOAT32_C(-823.05)}, - {SIMDE_FLOAT32_C(-359.95), SIMDE_FLOAT32_C(611.92), - SIMDE_FLOAT32_C(514.14), SIMDE_FLOAT32_C(-660.86)}, - {SIMDE_FLOAT32_C(181.81), SIMDE_FLOAT32_C(115.86)}, - INT32_C(0), - {SIMDE_FLOAT32_C(110646.710938), SIMDE_FLOAT32_C(-111760.718750), - SIMDE_FLOAT32_C(-120219.195312), SIMDE_FLOAT32_C(119327.898438)}}, - {{SIMDE_FLOAT32_C(951.84), SIMDE_FLOAT32_C(-864.96), - SIMDE_FLOAT32_C(970.59), SIMDE_FLOAT32_C(769.97)}, - {SIMDE_FLOAT32_C(609.64), SIMDE_FLOAT32_C(-580.87), - SIMDE_FLOAT32_C(358.59), SIMDE_FLOAT32_C(350.56)}, - {SIMDE_FLOAT32_C(26.31), SIMDE_FLOAT32_C(163.74)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-14330.849609), SIMDE_FLOAT32_C(14417.729492), - SIMDE_FLOAT32_C(10193.823242), SIMDE_FLOAT32_C(-8453.263672)}}, - {{SIMDE_FLOAT32_C(-636.68), SIMDE_FLOAT32_C(554.68), - SIMDE_FLOAT32_C(-385.40), SIMDE_FLOAT32_C(-565.95)}, - {SIMDE_FLOAT32_C(129.69), SIMDE_FLOAT32_C(961.79), - SIMDE_FLOAT32_C(-333.22), SIMDE_FLOAT32_C(69.65)}, - {SIMDE_FLOAT32_C(726.96), SIMDE_FLOAT32_C(131.41)}, - INT32_C(0), - {SIMDE_FLOAT32_C(698546.187500), SIMDE_FLOAT32_C(-698628.187500), - SIMDE_FLOAT32_C(50247.367188), SIMDE_FLOAT32_C(-51198.714844)}}, - {{SIMDE_FLOAT32_C(-211.91), SIMDE_FLOAT32_C(829.24), - SIMDE_FLOAT32_C(-475.13), SIMDE_FLOAT32_C(562.33)}, - {SIMDE_FLOAT32_C(515.59), SIMDE_FLOAT32_C(-290.69), - SIMDE_FLOAT32_C(-816.53), SIMDE_FLOAT32_C(17.39)}, - {SIMDE_FLOAT32_C(268.14), SIMDE_FLOAT32_C(729.88)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-78157.531250), SIMDE_FLOAT32_C(78774.859375), - SIMDE_FLOAT32_C(4187.824707), SIMDE_FLOAT32_C(-4100.624512)}}, - {{SIMDE_FLOAT32_C(-894.99), SIMDE_FLOAT32_C(516.42), - SIMDE_FLOAT32_C(-169.55), SIMDE_FLOAT32_C(696.41)}, - {SIMDE_FLOAT32_C(-388.51), SIMDE_FLOAT32_C(987.71), - SIMDE_FLOAT32_C(-91.49), SIMDE_FLOAT32_C(-970.85)}, - {SIMDE_FLOAT32_C(357.28), SIMDE_FLOAT32_C(-28.01)}, - INT32_C(0), - {SIMDE_FLOAT32_C(351994.031250), SIMDE_FLOAT32_C(-352372.625000), - SIMDE_FLOAT32_C(-347034.812500), SIMDE_FLOAT32_C(347561.687500)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT32_C( 301.65), SIMDE_FLOAT32_C( 490.71), SIMDE_FLOAT32_C( -744.66), SIMDE_FLOAT32_C( -738.17) }, + { SIMDE_FLOAT32_C( -301.20), SIMDE_FLOAT32_C( -904.34), SIMDE_FLOAT32_C( 771.98), SIMDE_FLOAT32_C( 233.71) }, + { SIMDE_FLOAT32_C( 830.18), SIMDE_FLOAT32_C( 979.39) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-750463.375000), SIMDE_FLOAT32_C(751255.687500), SIMDE_FLOAT32_C(193276.718750), SIMDE_FLOAT32_C(-194759.546875) } }, + { { SIMDE_FLOAT32_C( -38.01), SIMDE_FLOAT32_C( 151.61), SIMDE_FLOAT32_C( 201.45), SIMDE_FLOAT32_C( -747.32) }, + { SIMDE_FLOAT32_C( -331.17), SIMDE_FLOAT32_C( 7.62), SIMDE_FLOAT32_C( -454.77), SIMDE_FLOAT32_C( -381.81) }, + { SIMDE_FLOAT32_C( 236.39), SIMDE_FLOAT32_C( -158.94) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(1763.281738), SIMDE_FLOAT32_C(-1649.681763), SIMDE_FLOAT32_C(-90054.617188), SIMDE_FLOAT32_C(89508.742188) } }, + { { SIMDE_FLOAT32_C( 605.85), SIMDE_FLOAT32_C( 244.27), SIMDE_FLOAT32_C( -426.53), SIMDE_FLOAT32_C( -969.18) }, + { SIMDE_FLOAT32_C( 322.13), SIMDE_FLOAT32_C( 863.77), SIMDE_FLOAT32_C( -685.35), SIMDE_FLOAT32_C( -710.70) }, + { SIMDE_FLOAT32_C( -791.12), SIMDE_FLOAT32_C( 373.53) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-682739.875000), SIMDE_FLOAT32_C(683590.000000), SIMDE_FLOAT32_C(561822.437500), SIMDE_FLOAT32_C(-563218.187500) } }, + { { SIMDE_FLOAT32_C( -606.46), SIMDE_FLOAT32_C( -507.55), SIMDE_FLOAT32_C( -68.24), SIMDE_FLOAT32_C( -823.05) }, + { SIMDE_FLOAT32_C( -359.95), SIMDE_FLOAT32_C( 611.92), SIMDE_FLOAT32_C( 514.14), SIMDE_FLOAT32_C( -660.86) }, + { SIMDE_FLOAT32_C( 181.81), SIMDE_FLOAT32_C( 115.86) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(110646.710938), SIMDE_FLOAT32_C(-111760.718750), SIMDE_FLOAT32_C(-120219.195312), SIMDE_FLOAT32_C(119327.898438) } }, + { { SIMDE_FLOAT32_C( 951.84), SIMDE_FLOAT32_C( -864.96), SIMDE_FLOAT32_C( 970.59), SIMDE_FLOAT32_C( 769.97) }, + { SIMDE_FLOAT32_C( 609.64), SIMDE_FLOAT32_C( -580.87), SIMDE_FLOAT32_C( 358.59), SIMDE_FLOAT32_C( 350.56) }, + { SIMDE_FLOAT32_C( 26.31), SIMDE_FLOAT32_C( 163.74) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-14330.849609), SIMDE_FLOAT32_C(14417.729492), SIMDE_FLOAT32_C(10193.823242), SIMDE_FLOAT32_C(-8453.263672) } }, + { { SIMDE_FLOAT32_C( -636.68), SIMDE_FLOAT32_C( 554.68), SIMDE_FLOAT32_C( -385.40), SIMDE_FLOAT32_C( -565.95) }, + { SIMDE_FLOAT32_C( 129.69), SIMDE_FLOAT32_C( 961.79), SIMDE_FLOAT32_C( -333.22), SIMDE_FLOAT32_C( 69.65) }, + { SIMDE_FLOAT32_C( 726.96), SIMDE_FLOAT32_C( 131.41) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(698546.187500), SIMDE_FLOAT32_C(-698628.187500), SIMDE_FLOAT32_C(50247.367188), SIMDE_FLOAT32_C(-51198.714844) } }, + { { SIMDE_FLOAT32_C( -211.91), SIMDE_FLOAT32_C( 829.24), SIMDE_FLOAT32_C( -475.13), SIMDE_FLOAT32_C( 562.33) }, + { SIMDE_FLOAT32_C( 515.59), SIMDE_FLOAT32_C( -290.69), SIMDE_FLOAT32_C( -816.53), SIMDE_FLOAT32_C( 17.39) }, + { SIMDE_FLOAT32_C( 268.14), SIMDE_FLOAT32_C( 729.88) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-78157.531250), SIMDE_FLOAT32_C(78774.859375), SIMDE_FLOAT32_C(4187.824707), SIMDE_FLOAT32_C(-4100.624512) } }, + { { SIMDE_FLOAT32_C( -894.99), SIMDE_FLOAT32_C( 516.42), SIMDE_FLOAT32_C( -169.55), SIMDE_FLOAT32_C( 696.41) }, + { SIMDE_FLOAT32_C( -388.51), SIMDE_FLOAT32_C( 987.71), SIMDE_FLOAT32_C( -91.49), SIMDE_FLOAT32_C( -970.85) }, + { SIMDE_FLOAT32_C( 357.28), SIMDE_FLOAT32_C( -28.01) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(351994.031250), SIMDE_FLOAT32_C(-352372.625000), SIMDE_FLOAT32_C(-347034.812500), SIMDE_FLOAT32_C(347561.687500) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x4_t r = simde_vcmlaq_rot270_lane_f32(r_, a, b, 0); - simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), - 1); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 0, 0, 0, 0, 0, 0, 0}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcmlaq_lane_f32(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -690,7 +531,8 @@ static int test_simde_vcmlaq_rot270_lane_f32(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmlaq_rot270_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmlaq_rot270_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float16_t r_[8]; @@ -699,167 +541,102 @@ static int test_simde_vcmlaq_rot270_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[8]; } test_vec[] = { - {{SIMDE_FLOAT16_VALUE(-30.36), SIMDE_FLOAT16_VALUE(631.50), - SIMDE_FLOAT16_VALUE(851.00), SIMDE_FLOAT16_VALUE(-263.50), - SIMDE_FLOAT16_VALUE(140.00), SIMDE_FLOAT16_VALUE(859.00), - SIMDE_FLOAT16_VALUE(-834.50), SIMDE_FLOAT16_VALUE(216.12)}, - {SIMDE_FLOAT16_VALUE(996.00), SIMDE_FLOAT16_VALUE(529.50), - SIMDE_FLOAT16_VALUE(79.06), SIMDE_FLOAT16_VALUE(947.00), - SIMDE_FLOAT16_VALUE(122.00), SIMDE_FLOAT16_VALUE(-250.00), - SIMDE_FLOAT16_VALUE(-361.75), SIMDE_FLOAT16_VALUE(265.25)}, - {SIMDE_FLOAT16_VALUE(58.66), SIMDE_FLOAT16_VALUE(2.71), - SIMDE_FLOAT16_VALUE(99.81), SIMDE_FLOAT16_VALUE(-137.62), - SIMDE_FLOAT16_VALUE(-761.00), SIMDE_FLOAT16_VALUE(813.00), - SIMDE_FLOAT16_VALUE(-897.50), SIMDE_FLOAT16_VALUE(653.50)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(31024.00), SIMDE_FLOAT16_VALUE(-30432.00), - SIMDE_FLOAT16_VALUE(56384.00), SIMDE_FLOAT16_VALUE(-55808.00), - SIMDE_FLOAT16_VALUE(-14528.00), SIMDE_FLOAT16_VALUE(15520.00), - SIMDE_FLOAT16_VALUE(14728.00), SIMDE_FLOAT16_VALUE(-15344.00)}}, - {{SIMDE_FLOAT16_VALUE(396.00), SIMDE_FLOAT16_VALUE(413.00), - SIMDE_FLOAT16_VALUE(514.00), SIMDE_FLOAT16_VALUE(-977.50), - SIMDE_FLOAT16_VALUE(-672.00), SIMDE_FLOAT16_VALUE(-92.12), - SIMDE_FLOAT16_VALUE(-441.25), SIMDE_FLOAT16_VALUE(-374.25)}, - {SIMDE_FLOAT16_VALUE(-152.00), SIMDE_FLOAT16_VALUE(-79.56), - SIMDE_FLOAT16_VALUE(-214.62), SIMDE_FLOAT16_VALUE(-615.00), - SIMDE_FLOAT16_VALUE(79.00), SIMDE_FLOAT16_VALUE(84.00), - SIMDE_FLOAT16_VALUE(493.00), SIMDE_FLOAT16_VALUE(-96.00)}, - {SIMDE_FLOAT16_VALUE(104.12), SIMDE_FLOAT16_VALUE(78.50), - SIMDE_FLOAT16_VALUE(171.50), SIMDE_FLOAT16_VALUE(-682.50), - SIMDE_FLOAT16_VALUE(217.12), SIMDE_FLOAT16_VALUE(49.34), - SIMDE_FLOAT16_VALUE(256.50), SIMDE_FLOAT16_VALUE(-92.06)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-5848.00), SIMDE_FLOAT16_VALUE(6660.00), - SIMDE_FLOAT16_VALUE(-47776.00), SIMDE_FLOAT16_VALUE(47296.00), - SIMDE_FLOAT16_VALUE(5920.00), SIMDE_FLOAT16_VALUE(-6688.00), - SIMDE_FLOAT16_VALUE(-7976.00), SIMDE_FLOAT16_VALUE(7160.00)}}, - {{SIMDE_FLOAT16_VALUE(-728.00), SIMDE_FLOAT16_VALUE(-108.38), - SIMDE_FLOAT16_VALUE(-77.88), SIMDE_FLOAT16_VALUE(-353.00), - SIMDE_FLOAT16_VALUE(-239.00), SIMDE_FLOAT16_VALUE(704.50), - SIMDE_FLOAT16_VALUE(914.00), SIMDE_FLOAT16_VALUE(-211.12)}, - {SIMDE_FLOAT16_VALUE(-473.25), SIMDE_FLOAT16_VALUE(74.38), - SIMDE_FLOAT16_VALUE(904.50), SIMDE_FLOAT16_VALUE(-290.50), - SIMDE_FLOAT16_VALUE(-796.00), SIMDE_FLOAT16_VALUE(421.25), - SIMDE_FLOAT16_VALUE(215.75), SIMDE_FLOAT16_VALUE(249.38)}, - {SIMDE_FLOAT16_VALUE(-523.00), SIMDE_FLOAT16_VALUE(-720.00), - SIMDE_FLOAT16_VALUE(32.75), SIMDE_FLOAT16_VALUE(-487.75), - SIMDE_FLOAT16_VALUE(-705.50), SIMDE_FLOAT16_VALUE(-468.00), - SIMDE_FLOAT16_VALUE(-789.00), SIMDE_FLOAT16_VALUE(-866.00)}, - INT32_C(2), - {SIMDE_FLOAT16_VALUE(1708.00), SIMDE_FLOAT16_VALUE(-2544.00), - SIMDE_FLOAT16_VALUE(-9592.00), SIMDE_FLOAT16_VALUE(9160.00), - SIMDE_FLOAT16_VALUE(13560.00), SIMDE_FLOAT16_VALUE(-13088.00), - SIMDE_FLOAT16_VALUE(9080.00), SIMDE_FLOAT16_VALUE(-8376.00)}}, - {{SIMDE_FLOAT16_VALUE(-891.50), SIMDE_FLOAT16_VALUE(-299.00), - SIMDE_FLOAT16_VALUE(-595.00), SIMDE_FLOAT16_VALUE(-662.00), - SIMDE_FLOAT16_VALUE(-914.00), SIMDE_FLOAT16_VALUE(674.50), - SIMDE_FLOAT16_VALUE(771.50), SIMDE_FLOAT16_VALUE(14.33)}, - {SIMDE_FLOAT16_VALUE(880.00), SIMDE_FLOAT16_VALUE(767.00), - SIMDE_FLOAT16_VALUE(-738.50), SIMDE_FLOAT16_VALUE(581.50), - SIMDE_FLOAT16_VALUE(-342.00), SIMDE_FLOAT16_VALUE(580.50), - SIMDE_FLOAT16_VALUE(534.00), SIMDE_FLOAT16_VALUE(-671.00)}, - {SIMDE_FLOAT16_VALUE(-482.75), SIMDE_FLOAT16_VALUE(382.25), - SIMDE_FLOAT16_VALUE(503.00), SIMDE_FLOAT16_VALUE(35.00), - SIMDE_FLOAT16_VALUE(315.50), SIMDE_FLOAT16_VALUE(-23.56), - SIMDE_FLOAT16_VALUE(53.88), SIMDE_FLOAT16_VALUE(722.00)}, - INT32_C(3), - {SIMDE_FLOAT16_VALUE(25952.00), SIMDE_FLOAT16_VALUE(-27136.00), - SIMDE_FLOAT16_VALUE(19760.00), SIMDE_FLOAT16_VALUE(-21008.00), - SIMDE_FLOAT16_VALUE(19408.00), SIMDE_FLOAT16_VALUE(-19648.00), - SIMDE_FLOAT16_VALUE(-22720.00), SIMDE_FLOAT16_VALUE(23504.00)}}, - {{SIMDE_FLOAT16_VALUE(525.50), SIMDE_FLOAT16_VALUE(-679.00), - SIMDE_FLOAT16_VALUE(491.50), SIMDE_FLOAT16_VALUE(-505.00), - SIMDE_FLOAT16_VALUE(914.50), SIMDE_FLOAT16_VALUE(-312.00), - SIMDE_FLOAT16_VALUE(-404.50), SIMDE_FLOAT16_VALUE(-634.00)}, - {SIMDE_FLOAT16_VALUE(-86.62), SIMDE_FLOAT16_VALUE(-914.50), - SIMDE_FLOAT16_VALUE(-839.50), SIMDE_FLOAT16_VALUE(817.50), - SIMDE_FLOAT16_VALUE(-187.25), SIMDE_FLOAT16_VALUE(422.75), - SIMDE_FLOAT16_VALUE(604.50), SIMDE_FLOAT16_VALUE(-735.00)}, - {SIMDE_FLOAT16_VALUE(-21.31), SIMDE_FLOAT16_VALUE(-29.59), - SIMDE_FLOAT16_VALUE(-725.00), SIMDE_FLOAT16_VALUE(-503.00), - SIMDE_FLOAT16_VALUE(-75.56), SIMDE_FLOAT16_VALUE(215.38), - SIMDE_FLOAT16_VALUE(-742.00), SIMDE_FLOAT16_VALUE(-854.00)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(20016.00), SIMDE_FLOAT16_VALUE(-20176.00), - SIMDE_FLOAT16_VALUE(-16928.00), SIMDE_FLOAT16_VALUE(16912.00), - SIMDE_FLOAT16_VALUE(-8096.00), SIMDE_FLOAT16_VALUE(8696.00), - SIMDE_FLOAT16_VALUE(15264.00), SIMDE_FLOAT16_VALUE(-16296.00)}}, - {{SIMDE_FLOAT16_VALUE(-618.50), SIMDE_FLOAT16_VALUE(164.88), - SIMDE_FLOAT16_VALUE(304.75), SIMDE_FLOAT16_VALUE(-530.50), - SIMDE_FLOAT16_VALUE(-603.50), SIMDE_FLOAT16_VALUE(730.50), - SIMDE_FLOAT16_VALUE(46.66), SIMDE_FLOAT16_VALUE(629.00)}, - {SIMDE_FLOAT16_VALUE(-537.00), SIMDE_FLOAT16_VALUE(637.00), - SIMDE_FLOAT16_VALUE(884.50), SIMDE_FLOAT16_VALUE(378.25), - SIMDE_FLOAT16_VALUE(-10.17), SIMDE_FLOAT16_VALUE(-730.00), - SIMDE_FLOAT16_VALUE(-981.50), SIMDE_FLOAT16_VALUE(453.25)}, - {SIMDE_FLOAT16_VALUE(-575.00), SIMDE_FLOAT16_VALUE(12.88), - SIMDE_FLOAT16_VALUE(-667.50), SIMDE_FLOAT16_VALUE(380.50), - SIMDE_FLOAT16_VALUE(374.75), SIMDE_FLOAT16_VALUE(-222.50), - SIMDE_FLOAT16_VALUE(206.88), SIMDE_FLOAT16_VALUE(502.25)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(7588.00), SIMDE_FLOAT16_VALUE(-8040.00), - SIMDE_FLOAT16_VALUE(5176.00), SIMDE_FLOAT16_VALUE(-5404.00), - SIMDE_FLOAT16_VALUE(-10008.00), SIMDE_FLOAT16_VALUE(10136.00), - SIMDE_FLOAT16_VALUE(5884.00), SIMDE_FLOAT16_VALUE(-5212.00)}}, - {{SIMDE_FLOAT16_VALUE(-825.50), SIMDE_FLOAT16_VALUE(-472.75), - SIMDE_FLOAT16_VALUE(-531.00), SIMDE_FLOAT16_VALUE(-366.75), - SIMDE_FLOAT16_VALUE(143.12), SIMDE_FLOAT16_VALUE(698.50), - SIMDE_FLOAT16_VALUE(700.00), SIMDE_FLOAT16_VALUE(498.25)}, - {SIMDE_FLOAT16_VALUE(908.00), SIMDE_FLOAT16_VALUE(845.50), - SIMDE_FLOAT16_VALUE(-383.50), SIMDE_FLOAT16_VALUE(383.50), - SIMDE_FLOAT16_VALUE(357.75), SIMDE_FLOAT16_VALUE(-900.50), - SIMDE_FLOAT16_VALUE(-802.00), SIMDE_FLOAT16_VALUE(966.50)}, - {SIMDE_FLOAT16_VALUE(-993.00), SIMDE_FLOAT16_VALUE(477.50), - SIMDE_FLOAT16_VALUE(-23.00), SIMDE_FLOAT16_VALUE(102.38), - SIMDE_FLOAT16_VALUE(988.50), SIMDE_FLOAT16_VALUE(-311.75), - SIMDE_FLOAT16_VALUE(-668.50), SIMDE_FLOAT16_VALUE(148.25)}, - INT32_C(2), - {SIMDE_FLOAT16_VALUE(-20272.00), SIMDE_FLOAT16_VALUE(18976.00), - SIMDE_FLOAT16_VALUE(-9352.00), SIMDE_FLOAT16_VALUE(8456.00), - SIMDE_FLOAT16_VALUE(20848.00), SIMDE_FLOAT16_VALUE(-20016.00), - SIMDE_FLOAT16_VALUE(-21536.00), SIMDE_FLOAT16_VALUE(22720.00)}}, - {{SIMDE_FLOAT16_VALUE(213.88), SIMDE_FLOAT16_VALUE(337.75), - SIMDE_FLOAT16_VALUE(330.50), SIMDE_FLOAT16_VALUE(-88.56), - SIMDE_FLOAT16_VALUE(191.12), SIMDE_FLOAT16_VALUE(-615.00), - SIMDE_FLOAT16_VALUE(-113.25), SIMDE_FLOAT16_VALUE(775.50)}, - {SIMDE_FLOAT16_VALUE(295.50), SIMDE_FLOAT16_VALUE(687.00), - SIMDE_FLOAT16_VALUE(406.25), SIMDE_FLOAT16_VALUE(439.50), - SIMDE_FLOAT16_VALUE(-827.50), SIMDE_FLOAT16_VALUE(733.00), - SIMDE_FLOAT16_VALUE(499.00), SIMDE_FLOAT16_VALUE(931.00)}, - {SIMDE_FLOAT16_VALUE(790.00), SIMDE_FLOAT16_VALUE(-979.00), - SIMDE_FLOAT16_VALUE(70.62), SIMDE_FLOAT16_VALUE(-47.00), - SIMDE_FLOAT16_VALUE(228.50), SIMDE_FLOAT16_VALUE(-233.50), - SIMDE_FLOAT16_VALUE(-467.50), SIMDE_FLOAT16_VALUE(545.00)}, - INT32_C(3), - {SIMDE_FLOAT16_VALUE(-32080.00), SIMDE_FLOAT16_VALUE(32624.00), - SIMDE_FLOAT16_VALUE(-20320.00), SIMDE_FLOAT16_VALUE(20560.00), - SIMDE_FLOAT16_VALUE(-34272.00), SIMDE_FLOAT16_VALUE(33824.00), - SIMDE_FLOAT16_VALUE(-43872.00), SIMDE_FLOAT16_VALUE(44544.00)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT16_VALUE( -30.36), SIMDE_FLOAT16_VALUE( 631.50), SIMDE_FLOAT16_VALUE( 851.00), SIMDE_FLOAT16_VALUE( -263.50), + SIMDE_FLOAT16_VALUE( 140.00), SIMDE_FLOAT16_VALUE( 859.00), SIMDE_FLOAT16_VALUE( -834.50), SIMDE_FLOAT16_VALUE( 216.12) }, + { SIMDE_FLOAT16_VALUE( 996.00), SIMDE_FLOAT16_VALUE( 529.50), SIMDE_FLOAT16_VALUE( 79.06), SIMDE_FLOAT16_VALUE( 947.00), + SIMDE_FLOAT16_VALUE( 122.00), SIMDE_FLOAT16_VALUE( -250.00), SIMDE_FLOAT16_VALUE( -361.75), SIMDE_FLOAT16_VALUE( 265.25) }, + { SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 2.71), SIMDE_FLOAT16_VALUE( 99.81), SIMDE_FLOAT16_VALUE( -137.62), + SIMDE_FLOAT16_VALUE( -761.00), SIMDE_FLOAT16_VALUE( 813.00), SIMDE_FLOAT16_VALUE( -897.50), SIMDE_FLOAT16_VALUE( 653.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 31024.00), SIMDE_FLOAT16_VALUE(-30432.00), SIMDE_FLOAT16_VALUE( 56384.00), SIMDE_FLOAT16_VALUE(-55808.00), + SIMDE_FLOAT16_VALUE(-14528.00), SIMDE_FLOAT16_VALUE( 15520.00), SIMDE_FLOAT16_VALUE( 14728.00), SIMDE_FLOAT16_VALUE(-15344.00) } }, + { { SIMDE_FLOAT16_VALUE( 396.00), SIMDE_FLOAT16_VALUE( 413.00), SIMDE_FLOAT16_VALUE( 514.00), SIMDE_FLOAT16_VALUE( -977.50), + SIMDE_FLOAT16_VALUE( -672.00), SIMDE_FLOAT16_VALUE( -92.12), SIMDE_FLOAT16_VALUE( -441.25), SIMDE_FLOAT16_VALUE( -374.25) }, + { SIMDE_FLOAT16_VALUE( -152.00), SIMDE_FLOAT16_VALUE( -79.56), SIMDE_FLOAT16_VALUE( -214.62), SIMDE_FLOAT16_VALUE( -615.00), + SIMDE_FLOAT16_VALUE( 79.00), SIMDE_FLOAT16_VALUE( 84.00), SIMDE_FLOAT16_VALUE( 493.00), SIMDE_FLOAT16_VALUE( -96.00) }, + { SIMDE_FLOAT16_VALUE( 104.12), SIMDE_FLOAT16_VALUE( 78.50), SIMDE_FLOAT16_VALUE( 171.50), SIMDE_FLOAT16_VALUE( -682.50), + SIMDE_FLOAT16_VALUE( 217.12), SIMDE_FLOAT16_VALUE( 49.34), SIMDE_FLOAT16_VALUE( 256.50), SIMDE_FLOAT16_VALUE( -92.06) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -5848.00), SIMDE_FLOAT16_VALUE( 6660.00), SIMDE_FLOAT16_VALUE(-47776.00), SIMDE_FLOAT16_VALUE( 47296.00), + SIMDE_FLOAT16_VALUE( 5920.00), SIMDE_FLOAT16_VALUE( -6688.00), SIMDE_FLOAT16_VALUE( -7976.00), SIMDE_FLOAT16_VALUE( 7160.00) } }, + { { SIMDE_FLOAT16_VALUE( -728.00), SIMDE_FLOAT16_VALUE( -108.38), SIMDE_FLOAT16_VALUE( -77.88), SIMDE_FLOAT16_VALUE( -353.00), + SIMDE_FLOAT16_VALUE( -239.00), SIMDE_FLOAT16_VALUE( 704.50), SIMDE_FLOAT16_VALUE( 914.00), SIMDE_FLOAT16_VALUE( -211.12) }, + { SIMDE_FLOAT16_VALUE( -473.25), SIMDE_FLOAT16_VALUE( 74.38), SIMDE_FLOAT16_VALUE( 904.50), SIMDE_FLOAT16_VALUE( -290.50), + SIMDE_FLOAT16_VALUE( -796.00), SIMDE_FLOAT16_VALUE( 421.25), SIMDE_FLOAT16_VALUE( 215.75), SIMDE_FLOAT16_VALUE( 249.38) }, + { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -720.00), SIMDE_FLOAT16_VALUE( 32.75), SIMDE_FLOAT16_VALUE( -487.75), + SIMDE_FLOAT16_VALUE( -705.50), SIMDE_FLOAT16_VALUE( -468.00), SIMDE_FLOAT16_VALUE( -789.00), SIMDE_FLOAT16_VALUE( -866.00) }, + INT32_C( 2), + { SIMDE_FLOAT16_VALUE( 1708.00), SIMDE_FLOAT16_VALUE( -2544.00), SIMDE_FLOAT16_VALUE( -9592.00), SIMDE_FLOAT16_VALUE( 9160.00), + SIMDE_FLOAT16_VALUE( 13560.00), SIMDE_FLOAT16_VALUE(-13088.00), SIMDE_FLOAT16_VALUE( 9080.00), SIMDE_FLOAT16_VALUE( -8376.00) } }, + { { SIMDE_FLOAT16_VALUE( -891.50), SIMDE_FLOAT16_VALUE( -299.00), SIMDE_FLOAT16_VALUE( -595.00), SIMDE_FLOAT16_VALUE( -662.00), + SIMDE_FLOAT16_VALUE( -914.00), SIMDE_FLOAT16_VALUE( 674.50), SIMDE_FLOAT16_VALUE( 771.50), SIMDE_FLOAT16_VALUE( 14.33) }, + { SIMDE_FLOAT16_VALUE( 880.00), SIMDE_FLOAT16_VALUE( 767.00), SIMDE_FLOAT16_VALUE( -738.50), SIMDE_FLOAT16_VALUE( 581.50), + SIMDE_FLOAT16_VALUE( -342.00), SIMDE_FLOAT16_VALUE( 580.50), SIMDE_FLOAT16_VALUE( 534.00), SIMDE_FLOAT16_VALUE( -671.00) }, + { SIMDE_FLOAT16_VALUE( -482.75), SIMDE_FLOAT16_VALUE( 382.25), SIMDE_FLOAT16_VALUE( 503.00), SIMDE_FLOAT16_VALUE( 35.00), + SIMDE_FLOAT16_VALUE( 315.50), SIMDE_FLOAT16_VALUE( -23.56), SIMDE_FLOAT16_VALUE( 53.88), SIMDE_FLOAT16_VALUE( 722.00) }, + INT32_C( 3), + { SIMDE_FLOAT16_VALUE( 25952.00), SIMDE_FLOAT16_VALUE(-27136.00), SIMDE_FLOAT16_VALUE( 19760.00), SIMDE_FLOAT16_VALUE(-21008.00), + SIMDE_FLOAT16_VALUE( 19408.00), SIMDE_FLOAT16_VALUE(-19648.00), SIMDE_FLOAT16_VALUE(-22720.00), SIMDE_FLOAT16_VALUE( 23504.00) } }, + { { SIMDE_FLOAT16_VALUE( 525.50), SIMDE_FLOAT16_VALUE( -679.00), SIMDE_FLOAT16_VALUE( 491.50), SIMDE_FLOAT16_VALUE( -505.00), + SIMDE_FLOAT16_VALUE( 914.50), SIMDE_FLOAT16_VALUE( -312.00), SIMDE_FLOAT16_VALUE( -404.50), SIMDE_FLOAT16_VALUE( -634.00) }, + { SIMDE_FLOAT16_VALUE( -86.62), SIMDE_FLOAT16_VALUE( -914.50), SIMDE_FLOAT16_VALUE( -839.50), SIMDE_FLOAT16_VALUE( 817.50), + SIMDE_FLOAT16_VALUE( -187.25), SIMDE_FLOAT16_VALUE( 422.75), SIMDE_FLOAT16_VALUE( 604.50), SIMDE_FLOAT16_VALUE( -735.00) }, + { SIMDE_FLOAT16_VALUE( -21.31), SIMDE_FLOAT16_VALUE( -29.59), SIMDE_FLOAT16_VALUE( -725.00), SIMDE_FLOAT16_VALUE( -503.00), + SIMDE_FLOAT16_VALUE( -75.56), SIMDE_FLOAT16_VALUE( 215.38), SIMDE_FLOAT16_VALUE( -742.00), SIMDE_FLOAT16_VALUE( -854.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 20016.00), SIMDE_FLOAT16_VALUE(-20176.00), SIMDE_FLOAT16_VALUE(-16928.00), SIMDE_FLOAT16_VALUE( 16912.00), + SIMDE_FLOAT16_VALUE( -8096.00), SIMDE_FLOAT16_VALUE( 8696.00), SIMDE_FLOAT16_VALUE( 15264.00), SIMDE_FLOAT16_VALUE(-16296.00) } }, + { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( 164.88), SIMDE_FLOAT16_VALUE( 304.75), SIMDE_FLOAT16_VALUE( -530.50), + SIMDE_FLOAT16_VALUE( -603.50), SIMDE_FLOAT16_VALUE( 730.50), SIMDE_FLOAT16_VALUE( 46.66), SIMDE_FLOAT16_VALUE( 629.00) }, + { SIMDE_FLOAT16_VALUE( -537.00), SIMDE_FLOAT16_VALUE( 637.00), SIMDE_FLOAT16_VALUE( 884.50), SIMDE_FLOAT16_VALUE( 378.25), + SIMDE_FLOAT16_VALUE( -10.17), SIMDE_FLOAT16_VALUE( -730.00), SIMDE_FLOAT16_VALUE( -981.50), SIMDE_FLOAT16_VALUE( 453.25) }, + { SIMDE_FLOAT16_VALUE( -575.00), SIMDE_FLOAT16_VALUE( 12.88), SIMDE_FLOAT16_VALUE( -667.50), SIMDE_FLOAT16_VALUE( 380.50), + SIMDE_FLOAT16_VALUE( 374.75), SIMDE_FLOAT16_VALUE( -222.50), SIMDE_FLOAT16_VALUE( 206.88), SIMDE_FLOAT16_VALUE( 502.25) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 7588.00), SIMDE_FLOAT16_VALUE( -8040.00), SIMDE_FLOAT16_VALUE( 5176.00), SIMDE_FLOAT16_VALUE( -5404.00), + SIMDE_FLOAT16_VALUE(-10008.00), SIMDE_FLOAT16_VALUE( 10136.00), SIMDE_FLOAT16_VALUE( 5884.00), SIMDE_FLOAT16_VALUE( -5212.00) } }, + { { SIMDE_FLOAT16_VALUE( -825.50), SIMDE_FLOAT16_VALUE( -472.75), SIMDE_FLOAT16_VALUE( -531.00), SIMDE_FLOAT16_VALUE( -366.75), + SIMDE_FLOAT16_VALUE( 143.12), SIMDE_FLOAT16_VALUE( 698.50), SIMDE_FLOAT16_VALUE( 700.00), SIMDE_FLOAT16_VALUE( 498.25) }, + { SIMDE_FLOAT16_VALUE( 908.00), SIMDE_FLOAT16_VALUE( 845.50), SIMDE_FLOAT16_VALUE( -383.50), SIMDE_FLOAT16_VALUE( 383.50), + SIMDE_FLOAT16_VALUE( 357.75), SIMDE_FLOAT16_VALUE( -900.50), SIMDE_FLOAT16_VALUE( -802.00), SIMDE_FLOAT16_VALUE( 966.50) }, + { SIMDE_FLOAT16_VALUE( -993.00), SIMDE_FLOAT16_VALUE( 477.50), SIMDE_FLOAT16_VALUE( -23.00), SIMDE_FLOAT16_VALUE( 102.38), + SIMDE_FLOAT16_VALUE( 988.50), SIMDE_FLOAT16_VALUE( -311.75), SIMDE_FLOAT16_VALUE( -668.50), SIMDE_FLOAT16_VALUE( 148.25) }, + INT32_C( 2), + { SIMDE_FLOAT16_VALUE(-20272.00), SIMDE_FLOAT16_VALUE( 18976.00), SIMDE_FLOAT16_VALUE( -9352.00), SIMDE_FLOAT16_VALUE( 8456.00), + SIMDE_FLOAT16_VALUE( 20848.00), SIMDE_FLOAT16_VALUE(-20016.00), SIMDE_FLOAT16_VALUE(-21536.00), SIMDE_FLOAT16_VALUE( 22720.00) } }, + { { SIMDE_FLOAT16_VALUE( 213.88), SIMDE_FLOAT16_VALUE( 337.75), SIMDE_FLOAT16_VALUE( 330.50), SIMDE_FLOAT16_VALUE( -88.56), + SIMDE_FLOAT16_VALUE( 191.12), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 775.50) }, + { SIMDE_FLOAT16_VALUE( 295.50), SIMDE_FLOAT16_VALUE( 687.00), SIMDE_FLOAT16_VALUE( 406.25), SIMDE_FLOAT16_VALUE( 439.50), + SIMDE_FLOAT16_VALUE( -827.50), SIMDE_FLOAT16_VALUE( 733.00), SIMDE_FLOAT16_VALUE( 499.00), SIMDE_FLOAT16_VALUE( 931.00) }, + { SIMDE_FLOAT16_VALUE( 790.00), SIMDE_FLOAT16_VALUE( -979.00), SIMDE_FLOAT16_VALUE( 70.62), SIMDE_FLOAT16_VALUE( -47.00), + SIMDE_FLOAT16_VALUE( 228.50), SIMDE_FLOAT16_VALUE( -233.50), SIMDE_FLOAT16_VALUE( -467.50), SIMDE_FLOAT16_VALUE( 545.00) }, + INT32_C( 3), + { SIMDE_FLOAT16_VALUE(-32080.00), SIMDE_FLOAT16_VALUE( 32624.00), SIMDE_FLOAT16_VALUE(-20320.00), SIMDE_FLOAT16_VALUE( 20560.00), + SIMDE_FLOAT16_VALUE(-34272.00), SIMDE_FLOAT16_VALUE( 33824.00), SIMDE_FLOAT16_VALUE(-43872.00), SIMDE_FLOAT16_VALUE( 44544.00) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); - - simde_float16x8_t r; - SIMDE_CONSTIFY_4_( - simde_vcmlaq_rot270_laneq_f16, r, - (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - test_vec[i].lane, r_, a, b); - simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), - 1); + + simde_float16x8_t r; + SIMDE_CONSTIFY_4_(simde_vcmlaq_rot270_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + } - + return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 2, 3, 0, 1, 2, 3}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; + for (int i = 0 ; i < 8 ; i++) { simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t r = simde_vcmlaq_laneq_f16(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -870,7 +647,8 @@ static int test_simde_vcmlaq_rot270_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmlaq_rot270_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmlaq_rot270_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[4]; @@ -879,102 +657,67 @@ static int test_simde_vcmlaq_rot270_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[4]; } test_vec[] = { - {{SIMDE_FLOAT32_C(355.18), SIMDE_FLOAT32_C(169.63), - SIMDE_FLOAT32_C(116.87), SIMDE_FLOAT32_C(-467.19)}, - {SIMDE_FLOAT32_C(-513.94), SIMDE_FLOAT32_C(-999.71), - SIMDE_FLOAT32_C(-285.25), SIMDE_FLOAT32_C(-931.88)}, - {SIMDE_FLOAT32_C(-839.57), SIMDE_FLOAT32_C(-681.40), - SIMDE_FLOAT32_C(-117.60), SIMDE_FLOAT32_C(-459.86)}, - INT32_C(0), - {SIMDE_FLOAT32_C(839681.750000), SIMDE_FLOAT32_C(-839156.937500), - SIMDE_FLOAT32_C(782495.375000), SIMDE_FLOAT32_C(-782845.687500)}}, - {{SIMDE_FLOAT32_C(-57.67), SIMDE_FLOAT32_C(-897.07), - SIMDE_FLOAT32_C(118.98), SIMDE_FLOAT32_C(-387.92)}, - {SIMDE_FLOAT32_C(-362.79), SIMDE_FLOAT32_C(160.99), - SIMDE_FLOAT32_C(-2.72), SIMDE_FLOAT32_C(206.65)}, - {SIMDE_FLOAT32_C(49.34), SIMDE_FLOAT32_C(511.85), - SIMDE_FLOAT32_C(547.20), SIMDE_FLOAT32_C(-119.58)}, - INT32_C(1), - {SIMDE_FLOAT32_C(82345.062500), SIMDE_FLOAT32_C(-83299.804688), - SIMDE_FLOAT32_C(105892.781250), SIMDE_FLOAT32_C(-106161.718750)}}, - {{SIMDE_FLOAT32_C(-219.54), SIMDE_FLOAT32_C(-959.14), - SIMDE_FLOAT32_C(943.92), SIMDE_FLOAT32_C(628.48)}, - {SIMDE_FLOAT32_C(446.65), SIMDE_FLOAT32_C(-500.77), - SIMDE_FLOAT32_C(-347.79), SIMDE_FLOAT32_C(813.11)}, - {SIMDE_FLOAT32_C(-542.25), SIMDE_FLOAT32_C(232.48), - SIMDE_FLOAT32_C(684.35), SIMDE_FLOAT32_C(710.26)}, - INT32_C(0), - {SIMDE_FLOAT32_C(271323.000000), SIMDE_FLOAT32_C(-272501.656250), - SIMDE_FLOAT32_C(-439964.968750), SIMDE_FLOAT32_C(441537.375000)}}, - {{SIMDE_FLOAT32_C(783.09), SIMDE_FLOAT32_C(-727.02), - SIMDE_FLOAT32_C(-586.46), SIMDE_FLOAT32_C(64.33)}, - {SIMDE_FLOAT32_C(-490.08), SIMDE_FLOAT32_C(740.49), - SIMDE_FLOAT32_C(-591.56), SIMDE_FLOAT32_C(-759.78)}, - {SIMDE_FLOAT32_C(-380.84), SIMDE_FLOAT32_C(993.01), - SIMDE_FLOAT32_C(-759.56), SIMDE_FLOAT32_C(861.16)}, - INT32_C(1), - {SIMDE_FLOAT32_C(736097.062500), SIMDE_FLOAT32_C(-736041.000000), - SIMDE_FLOAT32_C(-755055.625000), SIMDE_FLOAT32_C(754533.500000)}}, - {{SIMDE_FLOAT32_C(998.31), SIMDE_FLOAT32_C(538.40), - SIMDE_FLOAT32_C(-191.12), SIMDE_FLOAT32_C(-434.48)}, - {SIMDE_FLOAT32_C(592.83), SIMDE_FLOAT32_C(820.32), - SIMDE_FLOAT32_C(-296.84), SIMDE_FLOAT32_C(-612.30)}, - {SIMDE_FLOAT32_C(-552.34), SIMDE_FLOAT32_C(329.08), - SIMDE_FLOAT32_C(765.26), SIMDE_FLOAT32_C(-531.08)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-452097.250000), SIMDE_FLOAT32_C(453633.968750), - SIMDE_FLOAT32_C(338006.656250), SIMDE_FLOAT32_C(-338632.281250)}}, - {{SIMDE_FLOAT32_C(52.61), SIMDE_FLOAT32_C(606.93), - SIMDE_FLOAT32_C(-894.22), SIMDE_FLOAT32_C(-854.38)}, - {SIMDE_FLOAT32_C(972.80), SIMDE_FLOAT32_C(-807.39), - SIMDE_FLOAT32_C(668.59), SIMDE_FLOAT32_C(-228.19)}, - {SIMDE_FLOAT32_C(-528.51), SIMDE_FLOAT32_C(730.93), - SIMDE_FLOAT32_C(-230.95), SIMDE_FLOAT32_C(-140.17)}, - INT32_C(1), - {SIMDE_FLOAT32_C(-590092.937500), SIMDE_FLOAT32_C(590752.500000), - SIMDE_FLOAT32_C(-167685.140625), SIMDE_FLOAT32_C(165936.531250)}}, - {{SIMDE_FLOAT32_C(556.73), SIMDE_FLOAT32_C(-701.90), - SIMDE_FLOAT32_C(-356.50), SIMDE_FLOAT32_C(-532.42)}, - {SIMDE_FLOAT32_C(856.94), SIMDE_FLOAT32_C(-261.67), - SIMDE_FLOAT32_C(-208.07), SIMDE_FLOAT32_C(27.93)}, - {SIMDE_FLOAT32_C(924.32), SIMDE_FLOAT32_C(-863.60), - SIMDE_FLOAT32_C(-687.65), SIMDE_FLOAT32_C(238.39)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-241310.093750), SIMDE_FLOAT32_C(241164.921875), - SIMDE_FLOAT32_C(25459.757812), SIMDE_FLOAT32_C(-26348.677734)}}, - {{SIMDE_FLOAT32_C(-286.79), SIMDE_FLOAT32_C(630.61), - SIMDE_FLOAT32_C(-989.22), SIMDE_FLOAT32_C(223.21)}, - {SIMDE_FLOAT32_C(812.31), SIMDE_FLOAT32_C(667.33), - SIMDE_FLOAT32_C(841.41), SIMDE_FLOAT32_C(735.52)}, - {SIMDE_FLOAT32_C(308.52), SIMDE_FLOAT32_C(-189.06), - SIMDE_FLOAT32_C(-63.33), SIMDE_FLOAT32_C(837.76)}, - INT32_C(1), - {SIMDE_FLOAT32_C(-126452.203125), SIMDE_FLOAT32_C(126796.023438), - SIMDE_FLOAT32_C(-140046.640625), SIMDE_FLOAT32_C(139280.625000)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT32_C( 355.18), SIMDE_FLOAT32_C( 169.63), SIMDE_FLOAT32_C( 116.87), SIMDE_FLOAT32_C( -467.19) }, + { SIMDE_FLOAT32_C( -513.94), SIMDE_FLOAT32_C( -999.71), SIMDE_FLOAT32_C( -285.25), SIMDE_FLOAT32_C( -931.88) }, + { SIMDE_FLOAT32_C( -839.57), SIMDE_FLOAT32_C( -681.40), SIMDE_FLOAT32_C( -117.60), SIMDE_FLOAT32_C( -459.86) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(839681.750000), SIMDE_FLOAT32_C(-839156.937500), SIMDE_FLOAT32_C(782495.375000), SIMDE_FLOAT32_C(-782845.687500) } }, + { { SIMDE_FLOAT32_C( -57.67), SIMDE_FLOAT32_C( -897.07), SIMDE_FLOAT32_C( 118.98), SIMDE_FLOAT32_C( -387.92) }, + { SIMDE_FLOAT32_C( -362.79), SIMDE_FLOAT32_C( 160.99), SIMDE_FLOAT32_C( -2.72), SIMDE_FLOAT32_C( 206.65) }, + { SIMDE_FLOAT32_C( 49.34), SIMDE_FLOAT32_C( 511.85), SIMDE_FLOAT32_C( 547.20), SIMDE_FLOAT32_C( -119.58) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(82345.062500), SIMDE_FLOAT32_C(-83299.804688), SIMDE_FLOAT32_C(105892.781250), SIMDE_FLOAT32_C(-106161.718750) } }, + { { SIMDE_FLOAT32_C( -219.54), SIMDE_FLOAT32_C( -959.14), SIMDE_FLOAT32_C( 943.92), SIMDE_FLOAT32_C( 628.48) }, + { SIMDE_FLOAT32_C( 446.65), SIMDE_FLOAT32_C( -500.77), SIMDE_FLOAT32_C( -347.79), SIMDE_FLOAT32_C( 813.11) }, + { SIMDE_FLOAT32_C( -542.25), SIMDE_FLOAT32_C( 232.48), SIMDE_FLOAT32_C( 684.35), SIMDE_FLOAT32_C( 710.26) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(271323.000000), SIMDE_FLOAT32_C(-272501.656250), SIMDE_FLOAT32_C(-439964.968750), SIMDE_FLOAT32_C(441537.375000) } }, + { { SIMDE_FLOAT32_C( 783.09), SIMDE_FLOAT32_C( -727.02), SIMDE_FLOAT32_C( -586.46), SIMDE_FLOAT32_C( 64.33) }, + { SIMDE_FLOAT32_C( -490.08), SIMDE_FLOAT32_C( 740.49), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -759.78) }, + { SIMDE_FLOAT32_C( -380.84), SIMDE_FLOAT32_C( 993.01), SIMDE_FLOAT32_C( -759.56), SIMDE_FLOAT32_C( 861.16) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(736097.062500), SIMDE_FLOAT32_C(-736041.000000), SIMDE_FLOAT32_C(-755055.625000), SIMDE_FLOAT32_C(754533.500000) } }, + { { SIMDE_FLOAT32_C( 998.31), SIMDE_FLOAT32_C( 538.40), SIMDE_FLOAT32_C( -191.12), SIMDE_FLOAT32_C( -434.48) }, + { SIMDE_FLOAT32_C( 592.83), SIMDE_FLOAT32_C( 820.32), SIMDE_FLOAT32_C( -296.84), SIMDE_FLOAT32_C( -612.30) }, + { SIMDE_FLOAT32_C( -552.34), SIMDE_FLOAT32_C( 329.08), SIMDE_FLOAT32_C( 765.26), SIMDE_FLOAT32_C( -531.08) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-452097.250000), SIMDE_FLOAT32_C(453633.968750), SIMDE_FLOAT32_C(338006.656250), SIMDE_FLOAT32_C(-338632.281250) } }, + { { SIMDE_FLOAT32_C( 52.61), SIMDE_FLOAT32_C( 606.93), SIMDE_FLOAT32_C( -894.22), SIMDE_FLOAT32_C( -854.38) }, + { SIMDE_FLOAT32_C( 972.80), SIMDE_FLOAT32_C( -807.39), SIMDE_FLOAT32_C( 668.59), SIMDE_FLOAT32_C( -228.19) }, + { SIMDE_FLOAT32_C( -528.51), SIMDE_FLOAT32_C( 730.93), SIMDE_FLOAT32_C( -230.95), SIMDE_FLOAT32_C( -140.17) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-590092.937500), SIMDE_FLOAT32_C(590752.500000), SIMDE_FLOAT32_C(-167685.140625), SIMDE_FLOAT32_C(165936.531250) } }, + { { SIMDE_FLOAT32_C( 556.73), SIMDE_FLOAT32_C( -701.90), SIMDE_FLOAT32_C( -356.50), SIMDE_FLOAT32_C( -532.42) }, + { SIMDE_FLOAT32_C( 856.94), SIMDE_FLOAT32_C( -261.67), SIMDE_FLOAT32_C( -208.07), SIMDE_FLOAT32_C( 27.93) }, + { SIMDE_FLOAT32_C( 924.32), SIMDE_FLOAT32_C( -863.60), SIMDE_FLOAT32_C( -687.65), SIMDE_FLOAT32_C( 238.39) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-241310.093750), SIMDE_FLOAT32_C(241164.921875), SIMDE_FLOAT32_C(25459.757812), SIMDE_FLOAT32_C(-26348.677734) } }, + { { SIMDE_FLOAT32_C( -286.79), SIMDE_FLOAT32_C( 630.61), SIMDE_FLOAT32_C( -989.22), SIMDE_FLOAT32_C( 223.21) }, + { SIMDE_FLOAT32_C( 812.31), SIMDE_FLOAT32_C( 667.33), SIMDE_FLOAT32_C( 841.41), SIMDE_FLOAT32_C( 735.52) }, + { SIMDE_FLOAT32_C( 308.52), SIMDE_FLOAT32_C( -189.06), SIMDE_FLOAT32_C( -63.33), SIMDE_FLOAT32_C( 837.76) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-126452.203125), SIMDE_FLOAT32_C(126796.023438), SIMDE_FLOAT32_C(-140046.640625), SIMDE_FLOAT32_C(139280.625000) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r; - SIMDE_CONSTIFY_2_( - simde_vcmlaq_rot270_laneq_f32, r, - (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), - test_vec[i].lane, r_, a, b); - simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), - 1); + SIMDE_CONSTIFY_2_(simde_vcmlaq_rot270_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcmlaq_rot270_laneq_f32(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); diff --git a/test/arm/neon/cmla_rot90_lane.c b/test/arm/neon/cmla_rot90_lane.c index 8daaef44b..46d5cdbcc 100644 --- a/test/arm/neon/cmla_rot90_lane.c +++ b/test/arm/neon/cmla_rot90_lane.c @@ -1,11 +1,11 @@ #define SIMDE_TEST_ARM_NEON_INSN cmla_rot90_lane +#include "test-neon.h" #include "../../../simde/arm/neon/cmla_rot90_lane.h" - #include "../../../simde/arm/neon/dup_n.h" -#include "test-neon.h" -static int test_simde_vcmla_rot90_lane_f16(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmla_rot90_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float16_t r_[4]; @@ -14,104 +14,69 @@ static int test_simde_vcmla_rot90_lane_f16(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[4]; } test_vec[] = { - {{SIMDE_FLOAT16_VALUE(-49.28), SIMDE_FLOAT16_VALUE(-109.00), - SIMDE_FLOAT16_VALUE(-626.50), SIMDE_FLOAT16_VALUE(-567.00)}, - {SIMDE_FLOAT16_VALUE(-178.88), SIMDE_FLOAT16_VALUE(10.22), - SIMDE_FLOAT16_VALUE(-228.12), SIMDE_FLOAT16_VALUE(-31.19)}, - {SIMDE_FLOAT16_VALUE(42.90), SIMDE_FLOAT16_VALUE(-98.75), - SIMDE_FLOAT16_VALUE(350.00), SIMDE_FLOAT16_VALUE(-48.00)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-487.75), SIMDE_FLOAT16_VALUE(329.50), - SIMDE_FLOAT16_VALUE(711.50), SIMDE_FLOAT16_VALUE(-1905.00)}}, - {{SIMDE_FLOAT16_VALUE(730.00), SIMDE_FLOAT16_VALUE(103.12), - SIMDE_FLOAT16_VALUE(-381.25), SIMDE_FLOAT16_VALUE(898.50)}, - {SIMDE_FLOAT16_VALUE(293.00), SIMDE_FLOAT16_VALUE(-14.06), - SIMDE_FLOAT16_VALUE(-137.62), SIMDE_FLOAT16_VALUE(722.50)}, - {SIMDE_FLOAT16_VALUE(-6.33), SIMDE_FLOAT16_VALUE(61.38), - SIMDE_FLOAT16_VALUE(115.50), SIMDE_FLOAT16_VALUE(70.38)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(1593.00), SIMDE_FLOAT16_VALUE(-760.00), - SIMDE_FLOAT16_VALUE(-44736.00), SIMDE_FLOAT16_VALUE(45248.00)}}, - {{SIMDE_FLOAT16_VALUE(-210.88), SIMDE_FLOAT16_VALUE(-430.75), - SIMDE_FLOAT16_VALUE(725.50), SIMDE_FLOAT16_VALUE(-944.50)}, - {SIMDE_FLOAT16_VALUE(142.00), SIMDE_FLOAT16_VALUE(-966.00), - SIMDE_FLOAT16_VALUE(-131.50), SIMDE_FLOAT16_VALUE(462.50)}, - {SIMDE_FLOAT16_VALUE(-61.00), SIMDE_FLOAT16_VALUE(185.62), - SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(749.50)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-59136.00), SIMDE_FLOAT16_VALUE(58496.00), - SIMDE_FLOAT16_VALUE(28944.00), SIMDE_FLOAT16_VALUE(-29152.00)}}, - {{SIMDE_FLOAT16_VALUE(89.44), SIMDE_FLOAT16_VALUE(-200.50), - SIMDE_FLOAT16_VALUE(-136.50), SIMDE_FLOAT16_VALUE(-180.50)}, - {SIMDE_FLOAT16_VALUE(-157.12), SIMDE_FLOAT16_VALUE(129.00), - SIMDE_FLOAT16_VALUE(99.06), SIMDE_FLOAT16_VALUE(-75.25)}, - {SIMDE_FLOAT16_VALUE(32.59), SIMDE_FLOAT16_VALUE(239.50), - SIMDE_FLOAT16_VALUE(-29.96), SIMDE_FLOAT16_VALUE(-177.50)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-30800.00), SIMDE_FLOAT16_VALUE(30688.00), - SIMDE_FLOAT16_VALUE(17888.00), SIMDE_FLOAT16_VALUE(-18208.00)}}, - {{SIMDE_FLOAT16_VALUE(167.25), SIMDE_FLOAT16_VALUE(-1.52), - SIMDE_FLOAT16_VALUE(-63.38), SIMDE_FLOAT16_VALUE(57.00)}, - {SIMDE_FLOAT16_VALUE(191.75), SIMDE_FLOAT16_VALUE(-197.00), - SIMDE_FLOAT16_VALUE(285.00), SIMDE_FLOAT16_VALUE(-529.00)}, - {SIMDE_FLOAT16_VALUE(-80.50), SIMDE_FLOAT16_VALUE(375.50), - SIMDE_FLOAT16_VALUE(-206.00), SIMDE_FLOAT16_VALUE(-75.25)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-15688.00), SIMDE_FLOAT16_VALUE(15856.00), - SIMDE_FLOAT16_VALUE(-42656.00), SIMDE_FLOAT16_VALUE(42656.00)}}, - {{SIMDE_FLOAT16_VALUE(-31.19), SIMDE_FLOAT16_VALUE(-75.25), - SIMDE_FLOAT16_VALUE(-41.00), SIMDE_FLOAT16_VALUE(285.00)}, - {SIMDE_FLOAT16_VALUE(-1.52), SIMDE_FLOAT16_VALUE(10.22), - SIMDE_FLOAT16_VALUE(-271.25), SIMDE_FLOAT16_VALUE(-257.50)}, - {SIMDE_FLOAT16_VALUE(-31.45), SIMDE_FLOAT16_VALUE(-180.50), - SIMDE_FLOAT16_VALUE(69.62), SIMDE_FLOAT16_VALUE(131.38)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(1813.00), SIMDE_FLOAT16_VALUE(-1920.00), - SIMDE_FLOAT16_VALUE(-46528.00), SIMDE_FLOAT16_VALUE(46752.00)}}, - {{SIMDE_FLOAT16_VALUE(205.75), SIMDE_FLOAT16_VALUE(-247.00), - SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(17.94)}, - {SIMDE_FLOAT16_VALUE(-397.75), SIMDE_FLOAT16_VALUE(166.12), - SIMDE_FLOAT16_VALUE(-110.75), SIMDE_FLOAT16_VALUE(18.20)}, - {SIMDE_FLOAT16_VALUE(89.81), SIMDE_FLOAT16_VALUE(59.75), - SIMDE_FLOAT16_VALUE(-113.25), SIMDE_FLOAT16_VALUE(97.31)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-14712.00), SIMDE_FLOAT16_VALUE(14672.00), - SIMDE_FLOAT16_VALUE(-1574.00), SIMDE_FLOAT16_VALUE(1653.00)}}, - {{SIMDE_FLOAT16_VALUE(-869.50), SIMDE_FLOAT16_VALUE(-710.00), - SIMDE_FLOAT16_VALUE(457.25), SIMDE_FLOAT16_VALUE(579.00)}, - {SIMDE_FLOAT16_VALUE(226.38), SIMDE_FLOAT16_VALUE(178.88), - SIMDE_FLOAT16_VALUE(181.75), SIMDE_FLOAT16_VALUE(-151.12)}, - {SIMDE_FLOAT16_VALUE(121.50), SIMDE_FLOAT16_VALUE(-78.44), - SIMDE_FLOAT16_VALUE(48.06), SIMDE_FLOAT16_VALUE(75.88)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(13160.00), SIMDE_FLOAT16_VALUE(-14744.00), - SIMDE_FLOAT16_VALUE(-11400.00), SIMDE_FLOAT16_VALUE(12432.00)}} - + { + { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) }, + { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -31.19) }, + { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -48.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( -487.75), SIMDE_FLOAT16_VALUE( 329.50), SIMDE_FLOAT16_VALUE( 711.50), SIMDE_FLOAT16_VALUE( -1905.00) } }, + { { SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, + { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -14.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, + { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 61.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 1593.00), SIMDE_FLOAT16_VALUE( -760.00), SIMDE_FLOAT16_VALUE(-44736.00), SIMDE_FLOAT16_VALUE( 45248.00) } }, + { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 725.50), SIMDE_FLOAT16_VALUE( -944.50) }, + { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -966.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, + { SIMDE_FLOAT16_VALUE( -61.00), SIMDE_FLOAT16_VALUE( 185.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-59136.00), SIMDE_FLOAT16_VALUE( 58496.00), SIMDE_FLOAT16_VALUE( 28944.00), SIMDE_FLOAT16_VALUE(-29152.00) } }, + { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -180.50) }, + { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 129.00), SIMDE_FLOAT16_VALUE( 99.06), SIMDE_FLOAT16_VALUE( -75.25) }, + { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 239.50), SIMDE_FLOAT16_VALUE( -29.96), SIMDE_FLOAT16_VALUE( -177.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-30800.00), SIMDE_FLOAT16_VALUE( 30688.00), SIMDE_FLOAT16_VALUE( 17888.00), SIMDE_FLOAT16_VALUE(-18208.00) } }, + { { SIMDE_FLOAT16_VALUE( 167.25), SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( -63.38), SIMDE_FLOAT16_VALUE( 57.00) }, + { SIMDE_FLOAT16_VALUE( 191.75), SIMDE_FLOAT16_VALUE( -197.00), SIMDE_FLOAT16_VALUE( 285.00), SIMDE_FLOAT16_VALUE( -529.00) }, + { SIMDE_FLOAT16_VALUE( -80.50), SIMDE_FLOAT16_VALUE( 375.50), SIMDE_FLOAT16_VALUE( -206.00), SIMDE_FLOAT16_VALUE( -75.25) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-15688.00), SIMDE_FLOAT16_VALUE( 15856.00), SIMDE_FLOAT16_VALUE(-42656.00), SIMDE_FLOAT16_VALUE( 42656.00) } }, + { { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( -41.00), SIMDE_FLOAT16_VALUE( 285.00) }, + { SIMDE_FLOAT16_VALUE( -1.52), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -271.25), SIMDE_FLOAT16_VALUE( -257.50) }, + { SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( -180.50), SIMDE_FLOAT16_VALUE( 69.62), SIMDE_FLOAT16_VALUE( 131.38) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 1813.00), SIMDE_FLOAT16_VALUE( -1920.00), SIMDE_FLOAT16_VALUE(-46528.00), SIMDE_FLOAT16_VALUE( 46752.00) } }, + { { SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -247.00), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 17.94) }, + { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( -110.75), SIMDE_FLOAT16_VALUE( 18.20) }, + { SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 97.31) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-14712.00), SIMDE_FLOAT16_VALUE( 14672.00), SIMDE_FLOAT16_VALUE( -1574.00), SIMDE_FLOAT16_VALUE( 1653.00) } }, + { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00) }, + { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( -151.12) }, + { SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44), SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( 75.88) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 13160.00), SIMDE_FLOAT16_VALUE(-14744.00), SIMDE_FLOAT16_VALUE(-11400.00), SIMDE_FLOAT16_VALUE( 12432.00) } } }; - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x4_t r; - SIMDE_CONSTIFY_2_( - simde_vcmla_rot90_lane_f16, r, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - test_vec[i].lane, r_, a, b); - + SIMDE_CONSTIFY_2_(simde_vcmla_rot90_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t r = simde_vcmla_rot90_lane_f16(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -122,7 +87,8 @@ static int test_simde_vcmla_rot90_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmla_rot90_lane_f32(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmla_rot90_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[2]; @@ -131,65 +97,67 @@ static int test_simde_vcmla_rot90_lane_f32(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[2]; } test_vec[] = { - {{SIMDE_FLOAT32_C(83.21), SIMDE_FLOAT32_C(417.90)}, - {SIMDE_FLOAT32_C(-875.72), SIMDE_FLOAT32_C(830.54)}, - {SIMDE_FLOAT32_C(-633.53), SIMDE_FLOAT32_C(832.17)}, - INT32_C(0), - {SIMDE_FLOAT32_C(526255.250000), SIMDE_FLOAT32_C(-525754.125000)}}, - {{SIMDE_FLOAT32_C(-890.17), SIMDE_FLOAT32_C(649.92)}, - {SIMDE_FLOAT32_C(-111.22), SIMDE_FLOAT32_C(-830.36)}, - {SIMDE_FLOAT32_C(59.76), SIMDE_FLOAT32_C(970.61)}, - INT32_C(0), - {SIMDE_FLOAT32_C(48732.140625), SIMDE_FLOAT32_C(-48972.390625)}}, - {{SIMDE_FLOAT32_C(522.31), SIMDE_FLOAT32_C(-822.40)}, - {SIMDE_FLOAT32_C(411.34), SIMDE_FLOAT32_C(-692.35)}, - {SIMDE_FLOAT32_C(648.71), SIMDE_FLOAT32_C(385.20)}, - INT32_C(0), - {SIMDE_FLOAT32_C(449656.687500), SIMDE_FLOAT32_C(-449956.781250)}}, - {{SIMDE_FLOAT32_C(479.18), SIMDE_FLOAT32_C(-793.73)}, - {SIMDE_FLOAT32_C(-740.26), SIMDE_FLOAT32_C(245.04)}, - {SIMDE_FLOAT32_C(229.26), SIMDE_FLOAT32_C(-113.23)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-55698.687500), SIMDE_FLOAT32_C(55384.136719)}}, - {{SIMDE_FLOAT32_C(331.48), SIMDE_FLOAT32_C(-677.34)}, - {SIMDE_FLOAT32_C(97.30), SIMDE_FLOAT32_C(-52.10)}, - {SIMDE_FLOAT32_C(239.17), SIMDE_FLOAT32_C(469.68)}, - INT32_C(0), - {SIMDE_FLOAT32_C(12792.236328), SIMDE_FLOAT32_C(-13138.096680)}}, - {{SIMDE_FLOAT32_C(-543.40), SIMDE_FLOAT32_C(826.85)}, - {SIMDE_FLOAT32_C(226.38), SIMDE_FLOAT32_C(178.84)}, - {SIMDE_FLOAT32_C(181.71), SIMDE_FLOAT32_C(420.52)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-33040.417969), SIMDE_FLOAT32_C(33323.867188)}}, - {{SIMDE_FLOAT32_C(-698.84), SIMDE_FLOAT32_C(-151.15)}, - {SIMDE_FLOAT32_C(-388.27), SIMDE_FLOAT32_C(350.81)}, - {SIMDE_FLOAT32_C(890.40), SIMDE_FLOAT32_C(-664.75)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-313060.062500), SIMDE_FLOAT32_C(312210.093750)}}, - {{SIMDE_FLOAT32_C(-617.94), SIMDE_FLOAT32_C(190.84)}, - {SIMDE_FLOAT32_C(218.13), SIMDE_FLOAT32_C(-328.97)}, - {SIMDE_FLOAT32_C(-549.59), SIMDE_FLOAT32_C(-459.89)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-181416.578125), SIMDE_FLOAT32_C(180989.468750)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT32_C( 83.21), SIMDE_FLOAT32_C( 417.90) }, + { SIMDE_FLOAT32_C( -875.72), SIMDE_FLOAT32_C( 830.54) }, + { SIMDE_FLOAT32_C( -633.53), SIMDE_FLOAT32_C( 832.17) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(526255.250000), SIMDE_FLOAT32_C(-525754.125000) } }, + { { SIMDE_FLOAT32_C( -890.17), SIMDE_FLOAT32_C( 649.92) }, + { SIMDE_FLOAT32_C( -111.22), SIMDE_FLOAT32_C( -830.36) }, + { SIMDE_FLOAT32_C( 59.76), SIMDE_FLOAT32_C( 970.61) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(48732.140625), SIMDE_FLOAT32_C(-48972.390625) } }, + { { SIMDE_FLOAT32_C( 522.31), SIMDE_FLOAT32_C( -822.40) }, + { SIMDE_FLOAT32_C( 411.34), SIMDE_FLOAT32_C( -692.35) }, + { SIMDE_FLOAT32_C( 648.71), SIMDE_FLOAT32_C( 385.20) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(449656.687500), SIMDE_FLOAT32_C(-449956.781250) } }, + { { SIMDE_FLOAT32_C( 479.18), SIMDE_FLOAT32_C( -793.73) }, + { SIMDE_FLOAT32_C( -740.26), SIMDE_FLOAT32_C( 245.04) }, + { SIMDE_FLOAT32_C( 229.26), SIMDE_FLOAT32_C( -113.23) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-55698.687500), SIMDE_FLOAT32_C(55384.136719) } }, + { { SIMDE_FLOAT32_C( 331.48), SIMDE_FLOAT32_C( -677.34) }, + { SIMDE_FLOAT32_C( 97.30), SIMDE_FLOAT32_C( -52.10) }, + { SIMDE_FLOAT32_C( 239.17), SIMDE_FLOAT32_C( 469.68) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(12792.236328), SIMDE_FLOAT32_C(-13138.096680) } }, + { { SIMDE_FLOAT32_C( -543.40), SIMDE_FLOAT32_C( 826.85) }, + { SIMDE_FLOAT32_C( 226.38), SIMDE_FLOAT32_C( 178.84) }, + { SIMDE_FLOAT32_C( 181.71), SIMDE_FLOAT32_C( 420.52) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-33040.417969), SIMDE_FLOAT32_C(33323.867188) } }, + { { SIMDE_FLOAT32_C( -698.84), SIMDE_FLOAT32_C( -151.15) }, + { SIMDE_FLOAT32_C( -388.27), SIMDE_FLOAT32_C( 350.81) }, + { SIMDE_FLOAT32_C( 890.40), SIMDE_FLOAT32_C( -664.75) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-313060.062500), SIMDE_FLOAT32_C(312210.093750) } }, + { { SIMDE_FLOAT32_C( -617.94), SIMDE_FLOAT32_C( 190.84) }, + { SIMDE_FLOAT32_C( 218.13), SIMDE_FLOAT32_C( -328.97) }, + { SIMDE_FLOAT32_C( -549.59), SIMDE_FLOAT32_C( -459.89) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-181416.578125), SIMDE_FLOAT32_C(180989.468750) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vcmla_rot90_lane_f32(r_, a, b, 0); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } - + return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 0, 0, 0, 0, 0, 0, 0}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vcmla_rot90_lane_f32(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -200,7 +168,9 @@ static int test_simde_vcmla_rot90_lane_f32(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmla_rot90_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { + +static int +test_simde_vcmla_rot90_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float16_t r_[4]; @@ -209,118 +179,76 @@ static int test_simde_vcmla_rot90_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[4]; } test_vec[] = { - {{SIMDE_FLOAT16_VALUE(816.50), SIMDE_FLOAT16_VALUE(773.50), - SIMDE_FLOAT16_VALUE(-377.75), SIMDE_FLOAT16_VALUE(544.50)}, - {SIMDE_FLOAT16_VALUE(-188.38), SIMDE_FLOAT16_VALUE(269.75), - SIMDE_FLOAT16_VALUE(115.62), SIMDE_FLOAT16_VALUE(-544.00)}, - {SIMDE_FLOAT16_VALUE(42.90), SIMDE_FLOAT16_VALUE(924.50), - SIMDE_FLOAT16_VALUE(484.00), SIMDE_FLOAT16_VALUE(-757.00), - SIMDE_FLOAT16_VALUE(730.00), SIMDE_FLOAT16_VALUE(103.12), - SIMDE_FLOAT16_VALUE(-381.25), SIMDE_FLOAT16_VALUE(898.50)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-10760.00), SIMDE_FLOAT16_VALUE(12344.00), - SIMDE_FLOAT16_VALUE(22960.00), SIMDE_FLOAT16_VALUE(-22800.00)}}, - {{SIMDE_FLOAT16_VALUE(293.00), SIMDE_FLOAT16_VALUE(-114.06), - SIMDE_FLOAT16_VALUE(-137.62), SIMDE_FLOAT16_VALUE(722.50)}, - {SIMDE_FLOAT16_VALUE(-6.33), SIMDE_FLOAT16_VALUE(161.38), - SIMDE_FLOAT16_VALUE(115.50), SIMDE_FLOAT16_VALUE(70.38)}, - {SIMDE_FLOAT16_VALUE(-899.00), SIMDE_FLOAT16_VALUE(32.51), - SIMDE_FLOAT16_VALUE(359.00), SIMDE_FLOAT16_VALUE(342.25), - SIMDE_FLOAT16_VALUE(913.00), SIMDE_FLOAT16_VALUE(740.50), - SIMDE_FLOAT16_VALUE(-436.25), SIMDE_FLOAT16_VALUE(-904.00)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-4952.00), SIMDE_FLOAT16_VALUE(5132.00), - SIMDE_FLOAT16_VALUE(-2424.00), SIMDE_FLOAT16_VALUE(3010.00)}}, - {{SIMDE_FLOAT16_VALUE(-335.25), SIMDE_FLOAT16_VALUE(-252.38), - SIMDE_FLOAT16_VALUE(227.25), SIMDE_FLOAT16_VALUE(-8.67)}, - {SIMDE_FLOAT16_VALUE(490.50), SIMDE_FLOAT16_VALUE(507.50), - SIMDE_FLOAT16_VALUE(-979.50), SIMDE_FLOAT16_VALUE(-718.00)}, - {SIMDE_FLOAT16_VALUE(-10.20), SIMDE_FLOAT16_VALUE(205.75), - SIMDE_FLOAT16_VALUE(-797.50), SIMDE_FLOAT16_VALUE(741.50), - SIMDE_FLOAT16_VALUE(-835.00), SIMDE_FLOAT16_VALUE(-308.00), - SIMDE_FLOAT16_VALUE(-674.50), SIMDE_FLOAT16_VALUE(-247.00)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(4844.00), SIMDE_FLOAT16_VALUE(-5432.00), - SIMDE_FLOAT16_VALUE(-7100.00), SIMDE_FLOAT16_VALUE(7316.00)}}, - {{SIMDE_FLOAT16_VALUE(-210.88), SIMDE_FLOAT16_VALUE(-430.75), - SIMDE_FLOAT16_VALUE(25.50), SIMDE_FLOAT16_VALUE(-44.50)}, - {SIMDE_FLOAT16_VALUE(142.00), SIMDE_FLOAT16_VALUE(-66.00), - SIMDE_FLOAT16_VALUE(-131.50), SIMDE_FLOAT16_VALUE(462.50)}, - {SIMDE_FLOAT16_VALUE(-361.00), SIMDE_FLOAT16_VALUE(85.62), - SIMDE_FLOAT16_VALUE(60.47), SIMDE_FLOAT16_VALUE(749.50), - SIMDE_FLOAT16_VALUE(-975.50), SIMDE_FLOAT16_VALUE(383.50), - SIMDE_FLOAT16_VALUE(500.25), SIMDE_FLOAT16_VALUE(517.00)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(5440.00), SIMDE_FLOAT16_VALUE(-6080.00), - SIMDE_FLOAT16_VALUE(-39584.00), SIMDE_FLOAT16_VALUE(39552.00)}}, - {{SIMDE_FLOAT16_VALUE(540.00), SIMDE_FLOAT16_VALUE(-927.50), - SIMDE_FLOAT16_VALUE(17.94), SIMDE_FLOAT16_VALUE(340.75)}, - {SIMDE_FLOAT16_VALUE(-397.75), SIMDE_FLOAT16_VALUE(166.12), - SIMDE_FLOAT16_VALUE(854.00), SIMDE_FLOAT16_VALUE(-110.75)}, - {SIMDE_FLOAT16_VALUE(-53.36), SIMDE_FLOAT16_VALUE(-465.00), - SIMDE_FLOAT16_VALUE(-104.00), SIMDE_FLOAT16_VALUE(-348.00), - SIMDE_FLOAT16_VALUE(-370.75), SIMDE_FLOAT16_VALUE(18.20), - SIMDE_FLOAT16_VALUE(-209.25), SIMDE_FLOAT16_VALUE(-856.00)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(9408.00), SIMDE_FLOAT16_VALUE(-9792.00), - SIMDE_FLOAT16_VALUE(-5892.00), SIMDE_FLOAT16_VALUE(6252.00)}}, - {{SIMDE_FLOAT16_VALUE(251.25), SIMDE_FLOAT16_VALUE(523.50), - SIMDE_FLOAT16_VALUE(-62.34), SIMDE_FLOAT16_VALUE(-412.00)}, - {SIMDE_FLOAT16_VALUE(818.50), SIMDE_FLOAT16_VALUE(-266.25), - SIMDE_FLOAT16_VALUE(558.50), SIMDE_FLOAT16_VALUE(-474.75)}, - {SIMDE_FLOAT16_VALUE(583.50), SIMDE_FLOAT16_VALUE(58.66), - SIMDE_FLOAT16_VALUE(875.50), SIMDE_FLOAT16_VALUE(382.50), - SIMDE_FLOAT16_VALUE(-535.00), SIMDE_FLOAT16_VALUE(374.50), - SIMDE_FLOAT16_VALUE(89.81), SIMDE_FLOAT16_VALUE(432.50)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(15872.00), SIMDE_FLOAT16_VALUE(-15096.00), - SIMDE_FLOAT16_VALUE(27792.00), SIMDE_FLOAT16_VALUE(-28256.00)}}, - {{SIMDE_FLOAT16_VALUE(83.19), SIMDE_FLOAT16_VALUE(418.00), - SIMDE_FLOAT16_VALUE(-875.50), SIMDE_FLOAT16_VALUE(830.50)}, - {SIMDE_FLOAT16_VALUE(-633.50), SIMDE_FLOAT16_VALUE(832.00), - SIMDE_FLOAT16_VALUE(-890.00), SIMDE_FLOAT16_VALUE(650.00)}, - {SIMDE_FLOAT16_VALUE(-31.19), SIMDE_FLOAT16_VALUE(-830.50), - SIMDE_FLOAT16_VALUE(59.75), SIMDE_FLOAT16_VALUE(970.50), - SIMDE_FLOAT16_VALUE(522.50), SIMDE_FLOAT16_VALUE(-822.50), - SIMDE_FLOAT16_VALUE(411.25), SIMDE_FLOAT16_VALUE(-692.50)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(26032.00), SIMDE_FLOAT16_VALUE(-25536.00), - SIMDE_FLOAT16_VALUE(19392.00), SIMDE_FLOAT16_VALUE(-19440.00)}}, - {{SIMDE_FLOAT16_VALUE(648.50), SIMDE_FLOAT16_VALUE(385.25), - SIMDE_FLOAT16_VALUE(479.25), SIMDE_FLOAT16_VALUE(-793.50)}, - {SIMDE_FLOAT16_VALUE(-740.50), SIMDE_FLOAT16_VALUE(245.00), - SIMDE_FLOAT16_VALUE(229.25), SIMDE_FLOAT16_VALUE(-113.25)}, - {SIMDE_FLOAT16_VALUE(331.50), SIMDE_FLOAT16_VALUE(-75.25), - SIMDE_FLOAT16_VALUE(97.31), SIMDE_FLOAT16_VALUE(-52.09), - SIMDE_FLOAT16_VALUE(239.12), SIMDE_FLOAT16_VALUE(469.75), - SIMDE_FLOAT16_VALUE(-543.50), SIMDE_FLOAT16_VALUE(827.00)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(19088.00), SIMDE_FLOAT16_VALUE(-18048.00), - SIMDE_FLOAT16_VALUE(-8044.00), SIMDE_FLOAT16_VALUE(7728.00)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT16_VALUE( 816.50), SIMDE_FLOAT16_VALUE( 773.50), SIMDE_FLOAT16_VALUE( -377.75), SIMDE_FLOAT16_VALUE( 544.50) }, + { SIMDE_FLOAT16_VALUE( -188.38), SIMDE_FLOAT16_VALUE( 269.75), SIMDE_FLOAT16_VALUE( 115.62), SIMDE_FLOAT16_VALUE( -544.00) }, + { SIMDE_FLOAT16_VALUE( 42.90), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( 484.00), SIMDE_FLOAT16_VALUE( -757.00), + SIMDE_FLOAT16_VALUE( 730.00), SIMDE_FLOAT16_VALUE( 103.12), SIMDE_FLOAT16_VALUE( -381.25), SIMDE_FLOAT16_VALUE( 898.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-10760.00), SIMDE_FLOAT16_VALUE( 12344.00), SIMDE_FLOAT16_VALUE( 22960.00), SIMDE_FLOAT16_VALUE(-22800.00) } }, + { { SIMDE_FLOAT16_VALUE( 293.00), SIMDE_FLOAT16_VALUE( -114.06), SIMDE_FLOAT16_VALUE( -137.62), SIMDE_FLOAT16_VALUE( 722.50) }, + { SIMDE_FLOAT16_VALUE( -6.33), SIMDE_FLOAT16_VALUE( 161.38), SIMDE_FLOAT16_VALUE( 115.50), SIMDE_FLOAT16_VALUE( 70.38) }, + { SIMDE_FLOAT16_VALUE( -899.00), SIMDE_FLOAT16_VALUE( 32.51), SIMDE_FLOAT16_VALUE( 359.00), SIMDE_FLOAT16_VALUE( 342.25), + SIMDE_FLOAT16_VALUE( 913.00), SIMDE_FLOAT16_VALUE( 740.50), SIMDE_FLOAT16_VALUE( -436.25), SIMDE_FLOAT16_VALUE( -904.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -4952.00), SIMDE_FLOAT16_VALUE( 5132.00), SIMDE_FLOAT16_VALUE( -2424.00), SIMDE_FLOAT16_VALUE( 3010.00) } }, + { { SIMDE_FLOAT16_VALUE( -335.25), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( 227.25), SIMDE_FLOAT16_VALUE( -8.67) }, + { SIMDE_FLOAT16_VALUE( 490.50), SIMDE_FLOAT16_VALUE( 507.50), SIMDE_FLOAT16_VALUE( -979.50), SIMDE_FLOAT16_VALUE( -718.00) }, + { SIMDE_FLOAT16_VALUE( -10.20), SIMDE_FLOAT16_VALUE( 205.75), SIMDE_FLOAT16_VALUE( -797.50), SIMDE_FLOAT16_VALUE( 741.50), + SIMDE_FLOAT16_VALUE( -835.00), SIMDE_FLOAT16_VALUE( -308.00), SIMDE_FLOAT16_VALUE( -674.50), SIMDE_FLOAT16_VALUE( -247.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 4844.00), SIMDE_FLOAT16_VALUE( -5432.00), SIMDE_FLOAT16_VALUE( -7100.00), SIMDE_FLOAT16_VALUE( 7316.00) } }, + { { SIMDE_FLOAT16_VALUE( -210.88), SIMDE_FLOAT16_VALUE( -430.75), SIMDE_FLOAT16_VALUE( 25.50), SIMDE_FLOAT16_VALUE( -44.50) }, + { SIMDE_FLOAT16_VALUE( 142.00), SIMDE_FLOAT16_VALUE( -66.00), SIMDE_FLOAT16_VALUE( -131.50), SIMDE_FLOAT16_VALUE( 462.50) }, + { SIMDE_FLOAT16_VALUE( -361.00), SIMDE_FLOAT16_VALUE( 85.62), SIMDE_FLOAT16_VALUE( 60.47), SIMDE_FLOAT16_VALUE( 749.50), + SIMDE_FLOAT16_VALUE( -975.50), SIMDE_FLOAT16_VALUE( 383.50), SIMDE_FLOAT16_VALUE( 500.25), SIMDE_FLOAT16_VALUE( 517.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 5440.00), SIMDE_FLOAT16_VALUE( -6080.00), SIMDE_FLOAT16_VALUE(-39584.00), SIMDE_FLOAT16_VALUE( 39552.00) } }, + { { SIMDE_FLOAT16_VALUE( 540.00), SIMDE_FLOAT16_VALUE( -927.50), SIMDE_FLOAT16_VALUE( 17.94), SIMDE_FLOAT16_VALUE( 340.75) }, + { SIMDE_FLOAT16_VALUE( -397.75), SIMDE_FLOAT16_VALUE( 166.12), SIMDE_FLOAT16_VALUE( 854.00), SIMDE_FLOAT16_VALUE( -110.75) }, + { SIMDE_FLOAT16_VALUE( -53.36), SIMDE_FLOAT16_VALUE( -465.00), SIMDE_FLOAT16_VALUE( -104.00), SIMDE_FLOAT16_VALUE( -348.00), + SIMDE_FLOAT16_VALUE( -370.75), SIMDE_FLOAT16_VALUE( 18.20), SIMDE_FLOAT16_VALUE( -209.25), SIMDE_FLOAT16_VALUE( -856.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 9408.00), SIMDE_FLOAT16_VALUE( -9792.00), SIMDE_FLOAT16_VALUE( -5892.00), SIMDE_FLOAT16_VALUE( 6252.00) } }, + { { SIMDE_FLOAT16_VALUE( 251.25), SIMDE_FLOAT16_VALUE( 523.50), SIMDE_FLOAT16_VALUE( -62.34), SIMDE_FLOAT16_VALUE( -412.00) }, + { SIMDE_FLOAT16_VALUE( 818.50), SIMDE_FLOAT16_VALUE( -266.25), SIMDE_FLOAT16_VALUE( 558.50), SIMDE_FLOAT16_VALUE( -474.75) }, + { SIMDE_FLOAT16_VALUE( 583.50), SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 875.50), SIMDE_FLOAT16_VALUE( 382.50), + SIMDE_FLOAT16_VALUE( -535.00), SIMDE_FLOAT16_VALUE( 374.50), SIMDE_FLOAT16_VALUE( 89.81), SIMDE_FLOAT16_VALUE( 432.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 15872.00), SIMDE_FLOAT16_VALUE(-15096.00), SIMDE_FLOAT16_VALUE( 27792.00), SIMDE_FLOAT16_VALUE(-28256.00) } }, + { { SIMDE_FLOAT16_VALUE( 83.19), SIMDE_FLOAT16_VALUE( 418.00), SIMDE_FLOAT16_VALUE( -875.50), SIMDE_FLOAT16_VALUE( 830.50) }, + { SIMDE_FLOAT16_VALUE( -633.50), SIMDE_FLOAT16_VALUE( 832.00), SIMDE_FLOAT16_VALUE( -890.00), SIMDE_FLOAT16_VALUE( 650.00) }, + { SIMDE_FLOAT16_VALUE( -31.19), SIMDE_FLOAT16_VALUE( -830.50), SIMDE_FLOAT16_VALUE( 59.75), SIMDE_FLOAT16_VALUE( 970.50), + SIMDE_FLOAT16_VALUE( 522.50), SIMDE_FLOAT16_VALUE( -822.50), SIMDE_FLOAT16_VALUE( 411.25), SIMDE_FLOAT16_VALUE( -692.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 26032.00), SIMDE_FLOAT16_VALUE(-25536.00), SIMDE_FLOAT16_VALUE( 19392.00), SIMDE_FLOAT16_VALUE(-19440.00) } }, + { { SIMDE_FLOAT16_VALUE( 648.50), SIMDE_FLOAT16_VALUE( 385.25), SIMDE_FLOAT16_VALUE( 479.25), SIMDE_FLOAT16_VALUE( -793.50) }, + { SIMDE_FLOAT16_VALUE( -740.50), SIMDE_FLOAT16_VALUE( 245.00), SIMDE_FLOAT16_VALUE( 229.25), SIMDE_FLOAT16_VALUE( -113.25) }, + { SIMDE_FLOAT16_VALUE( 331.50), SIMDE_FLOAT16_VALUE( -75.25), SIMDE_FLOAT16_VALUE( 97.31), SIMDE_FLOAT16_VALUE( -52.09), + SIMDE_FLOAT16_VALUE( 239.12), SIMDE_FLOAT16_VALUE( 469.75), SIMDE_FLOAT16_VALUE( -543.50), SIMDE_FLOAT16_VALUE( 827.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 19088.00), SIMDE_FLOAT16_VALUE(-18048.00), SIMDE_FLOAT16_VALUE( -8044.00), SIMDE_FLOAT16_VALUE( 7728.00) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float16x4_t r_ = simde_vld1_f16(test_vec[i].r_); simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); simde_float16x4_t r; - SIMDE_CONSTIFY_2_( - simde_vcmla_rot90_laneq_f16, r, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_(simde_vcmla_rot90_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { simde_float16x4_t r_ = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x4_t r = simde_vcmla_rot90_laneq_f16(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -331,7 +259,8 @@ static int test_simde_vcmla_rot90_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmla_rot90_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmla_rot90_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[2]; @@ -340,78 +269,68 @@ static int test_simde_vcmla_rot90_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[2]; } test_vec[] = { - {{SIMDE_FLOAT32_C(-470.26), SIMDE_FLOAT32_C(81.87)}, - {SIMDE_FLOAT32_C(72.27), SIMDE_FLOAT32_C(-992.59)}, - {SIMDE_FLOAT32_C(-615.67), SIMDE_FLOAT32_C(620.28), - SIMDE_FLOAT32_C(-606.51), SIMDE_FLOAT32_C(-327.77)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-611578.125000), SIMDE_FLOAT32_C(611189.750000)}}, - {{SIMDE_FLOAT32_C(-331.80), SIMDE_FLOAT32_C(-605.94)}, - {SIMDE_FLOAT32_C(-295.86), SIMDE_FLOAT32_C(-275.42)}, - {SIMDE_FLOAT32_C(-752.64), SIMDE_FLOAT32_C(21.91), - SIMDE_FLOAT32_C(826.83), SIMDE_FLOAT32_C(599.82)}, - INT32_C(1), - {SIMDE_FLOAT32_C(5702.652344), SIMDE_FLOAT32_C(-6640.392578)}}, - {{SIMDE_FLOAT32_C(-30.36), SIMDE_FLOAT32_C(631.53)}, - {SIMDE_FLOAT32_C(850.75), SIMDE_FLOAT32_C(-263.55)}, - {SIMDE_FLOAT32_C(139.96), SIMDE_FLOAT32_C(859.14), - SIMDE_FLOAT32_C(-834.47), SIMDE_FLOAT32_C(216.10)}, - INT32_C(0), - {SIMDE_FLOAT32_C(36856.097656), SIMDE_FLOAT32_C(-36254.929688)}}, - {{SIMDE_FLOAT32_C(995.86), SIMDE_FLOAT32_C(529.74)}, - {SIMDE_FLOAT32_C(79.08), SIMDE_FLOAT32_C(947.13)}, - {SIMDE_FLOAT32_C(122.02), SIMDE_FLOAT32_C(-250.00), - SIMDE_FLOAT32_C(-361.82), SIMDE_FLOAT32_C(265.24)}, - INT32_C(1), - {SIMDE_FLOAT32_C(237778.359375), SIMDE_FLOAT32_C(-236252.765625)}}, - {{SIMDE_FLOAT32_C(275.71), SIMDE_FLOAT32_C(2.71)}, - {SIMDE_FLOAT32_C(99.79), SIMDE_FLOAT32_C(-137.67)}, - {SIMDE_FLOAT32_C(-761.19), SIMDE_FLOAT32_C(813.19), - SIMDE_FLOAT32_C(-897.68), SIMDE_FLOAT32_C(653.58)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-104517.312500), SIMDE_FLOAT32_C(104795.734375)}}, - {{SIMDE_FLOAT32_C(396.02), SIMDE_FLOAT32_C(413.06)}, - {SIMDE_FLOAT32_C(514.09), SIMDE_FLOAT32_C(-977.67)}, - {SIMDE_FLOAT32_C(-671.79), SIMDE_FLOAT32_C(-92.13), - SIMDE_FLOAT32_C(-441.32), SIMDE_FLOAT32_C(-374.27)}, - INT32_C(1), - {SIMDE_FLOAT32_C(-89676.710938), SIMDE_FLOAT32_C(90485.789062)}}, - {{SIMDE_FLOAT32_C(-151.97), SIMDE_FLOAT32_C(-79.55)}, - {SIMDE_FLOAT32_C(-214.62), SIMDE_FLOAT32_C(-614.75)}, - {SIMDE_FLOAT32_C(678.79), SIMDE_FLOAT32_C(783.83), - SIMDE_FLOAT32_C(493.05), SIMDE_FLOAT32_C(-896.00)}, - INT32_C(0), - {SIMDE_FLOAT32_C(417134.156250), SIMDE_FLOAT32_C(-417365.687500)}}, - {{SIMDE_FLOAT32_C(104.13), SIMDE_FLOAT32_C(278.54)}, - {SIMDE_FLOAT32_C(171.54), SIMDE_FLOAT32_C(-682.63)}, - {SIMDE_FLOAT32_C(217.09), SIMDE_FLOAT32_C(49.35), - SIMDE_FLOAT32_C(256.50), SIMDE_FLOAT32_C(-92.04)}, - INT32_C(1), - {SIMDE_FLOAT32_C(33791.917969), SIMDE_FLOAT32_C(-33409.250000)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT32_C( -470.26), SIMDE_FLOAT32_C( 81.87) }, + { SIMDE_FLOAT32_C( 72.27), SIMDE_FLOAT32_C( -992.59) }, + { SIMDE_FLOAT32_C( -615.67), SIMDE_FLOAT32_C( 620.28), SIMDE_FLOAT32_C( -606.51), SIMDE_FLOAT32_C( -327.77) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-611578.125000), SIMDE_FLOAT32_C(611189.750000) } }, + { { SIMDE_FLOAT32_C( -331.80), SIMDE_FLOAT32_C( -605.94) }, + { SIMDE_FLOAT32_C( -295.86), SIMDE_FLOAT32_C( -275.42) }, + { SIMDE_FLOAT32_C( -752.64), SIMDE_FLOAT32_C( 21.91), SIMDE_FLOAT32_C( 826.83), SIMDE_FLOAT32_C( 599.82) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(5702.652344), SIMDE_FLOAT32_C(-6640.392578) } }, + { { SIMDE_FLOAT32_C( -30.36), SIMDE_FLOAT32_C( 631.53) }, + { SIMDE_FLOAT32_C( 850.75), SIMDE_FLOAT32_C( -263.55) }, + { SIMDE_FLOAT32_C( 139.96), SIMDE_FLOAT32_C( 859.14), SIMDE_FLOAT32_C( -834.47), SIMDE_FLOAT32_C( 216.10) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(36856.097656), SIMDE_FLOAT32_C(-36254.929688) } }, + { { SIMDE_FLOAT32_C( 995.86), SIMDE_FLOAT32_C( 529.74) }, + { SIMDE_FLOAT32_C( 79.08), SIMDE_FLOAT32_C( 947.13) }, + { SIMDE_FLOAT32_C( 122.02), SIMDE_FLOAT32_C( -250.00), SIMDE_FLOAT32_C( -361.82), SIMDE_FLOAT32_C( 265.24) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(237778.359375), SIMDE_FLOAT32_C(-236252.765625) } }, + { { SIMDE_FLOAT32_C( 275.71), SIMDE_FLOAT32_C( 2.71) }, + { SIMDE_FLOAT32_C( 99.79), SIMDE_FLOAT32_C( -137.67) }, + { SIMDE_FLOAT32_C( -761.19), SIMDE_FLOAT32_C( 813.19), SIMDE_FLOAT32_C( -897.68), SIMDE_FLOAT32_C( 653.58) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-104517.312500), SIMDE_FLOAT32_C(104795.734375) } }, + { { SIMDE_FLOAT32_C( 396.02), SIMDE_FLOAT32_C( 413.06) }, + { SIMDE_FLOAT32_C( 514.09), SIMDE_FLOAT32_C( -977.67) }, + { SIMDE_FLOAT32_C( -671.79), SIMDE_FLOAT32_C( -92.13), SIMDE_FLOAT32_C( -441.32), SIMDE_FLOAT32_C( -374.27) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-89676.710938), SIMDE_FLOAT32_C(90485.789062) } }, + { { SIMDE_FLOAT32_C( -151.97), SIMDE_FLOAT32_C( -79.55) }, + { SIMDE_FLOAT32_C( -214.62), SIMDE_FLOAT32_C( -614.75) }, + { SIMDE_FLOAT32_C( 678.79), SIMDE_FLOAT32_C( 783.83), SIMDE_FLOAT32_C( 493.05), SIMDE_FLOAT32_C( -896.00) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(417134.156250), SIMDE_FLOAT32_C(-417365.687500) } }, + { { SIMDE_FLOAT32_C( 104.13), SIMDE_FLOAT32_C( 278.54) }, + { SIMDE_FLOAT32_C( 171.54), SIMDE_FLOAT32_C( -682.63) }, + { SIMDE_FLOAT32_C( 217.09), SIMDE_FLOAT32_C( 49.35), SIMDE_FLOAT32_C( 256.50), SIMDE_FLOAT32_C( -92.04) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(33791.917969), SIMDE_FLOAT32_C(-33409.250000) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t r_ = simde_vld1_f32(test_vec[i].r_); simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x2_t r; - SIMDE_CONSTIFY_2_( - simde_vcmla_rot90_laneq_f32, r, - (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), - test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_(simde_vcmla_rot90_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t r_ = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vcmla_rot90_laneq_f32(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -422,7 +341,8 @@ static int test_simde_vcmla_rot90_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmlaq_rot90_lane_f16(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmlaq_rot90_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float16_t r_[8]; @@ -431,154 +351,93 @@ static int test_simde_vcmlaq_rot90_lane_f16(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[8]; } test_vec[] = { - - {{SIMDE_FLOAT16_VALUE(226.38), SIMDE_FLOAT16_VALUE(178.88), - SIMDE_FLOAT16_VALUE(181.75), SIMDE_FLOAT16_VALUE(420.50), - SIMDE_FLOAT16_VALUE(-699.00), SIMDE_FLOAT16_VALUE(-151.12), - SIMDE_FLOAT16_VALUE(-388.25), SIMDE_FLOAT16_VALUE(350.75)}, - {SIMDE_FLOAT16_VALUE(890.50), SIMDE_FLOAT16_VALUE(-665.00), - SIMDE_FLOAT16_VALUE(-618.00), SIMDE_FLOAT16_VALUE(190.88), - SIMDE_FLOAT16_VALUE(218.12), SIMDE_FLOAT16_VALUE(-329.00), - SIMDE_FLOAT16_VALUE(-549.50), SIMDE_FLOAT16_VALUE(-460.00)}, - {SIMDE_FLOAT16_VALUE(-49.28), SIMDE_FLOAT16_VALUE(-695.50), - SIMDE_FLOAT16_VALUE(121.50), SIMDE_FLOAT16_VALUE(-78.44)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-32544.00), SIMDE_FLOAT16_VALUE(32960.00), - SIMDE_FLOAT16_VALUE(9592.00), SIMDE_FLOAT16_VALUE(-8984.00), - SIMDE_FLOAT16_VALUE(-16912.00), SIMDE_FLOAT16_VALUE(16064.00), - SIMDE_FLOAT16_VALUE(-23056.00), SIMDE_FLOAT16_VALUE(23024.00)}}, - {{SIMDE_FLOAT16_VALUE(-869.50), SIMDE_FLOAT16_VALUE(-710.00), - SIMDE_FLOAT16_VALUE(457.25), SIMDE_FLOAT16_VALUE(579.00), - SIMDE_FLOAT16_VALUE(-786.50), SIMDE_FLOAT16_VALUE(-974.00), - SIMDE_FLOAT16_VALUE(-570.50), SIMDE_FLOAT16_VALUE(369.25)}, - {SIMDE_FLOAT16_VALUE(48.06), SIMDE_FLOAT16_VALUE(-40.00), - SIMDE_FLOAT16_VALUE(-52.00), SIMDE_FLOAT16_VALUE(75.88), - SIMDE_FLOAT16_VALUE(352.25), SIMDE_FLOAT16_VALUE(324.25), - SIMDE_FLOAT16_VALUE(485.50), SIMDE_FLOAT16_VALUE(43.50)}, - {SIMDE_FLOAT16_VALUE(-976.00), SIMDE_FLOAT16_VALUE(89.44), - SIMDE_FLOAT16_VALUE(-100.62), SIMDE_FLOAT16_VALUE(-463.75)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(2708.00), SIMDE_FLOAT16_VALUE(-4288.00), - SIMDE_FLOAT16_VALUE(-6328.00), SIMDE_FLOAT16_VALUE(7364.00), - SIMDE_FLOAT16_VALUE(-29792.00), SIMDE_FLOAT16_VALUE(28032.00), - SIMDE_FLOAT16_VALUE(-4460.00), SIMDE_FLOAT16_VALUE(4260.00)}}, - {{SIMDE_FLOAT16_VALUE(-973.00), SIMDE_FLOAT16_VALUE(-159.62), - SIMDE_FLOAT16_VALUE(-377.00), SIMDE_FLOAT16_VALUE(-853.00), - SIMDE_FLOAT16_VALUE(831.00), SIMDE_FLOAT16_VALUE(661.00), - SIMDE_FLOAT16_VALUE(-717.00), SIMDE_FLOAT16_VALUE(891.50)}, - {SIMDE_FLOAT16_VALUE(535.50), SIMDE_FLOAT16_VALUE(621.50), - SIMDE_FLOAT16_VALUE(578.50), SIMDE_FLOAT16_VALUE(430.50), - SIMDE_FLOAT16_VALUE(-574.50), SIMDE_FLOAT16_VALUE(-319.75), - SIMDE_FLOAT16_VALUE(-735.00), SIMDE_FLOAT16_VALUE(248.50)}, - {SIMDE_FLOAT16_VALUE(32.59), SIMDE_FLOAT16_VALUE(192.38), - SIMDE_FLOAT16_VALUE(679.00), SIMDE_FLOAT16_VALUE(502.25)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-21232.00), SIMDE_FLOAT16_VALUE(20096.00), - SIMDE_FLOAT16_VALUE(-14408.00), SIMDE_FLOAT16_VALUE(13176.00), - SIMDE_FLOAT16_VALUE(11256.00), SIMDE_FLOAT16_VALUE(-9760.00), - SIMDE_FLOAT16_VALUE(-8816.00), SIMDE_FLOAT16_VALUE(8992.00)}}, - {{SIMDE_FLOAT16_VALUE(-618.50), SIMDE_FLOAT16_VALUE(-70.81), - SIMDE_FLOAT16_VALUE(-596.50), SIMDE_FLOAT16_VALUE(810.50), - SIMDE_FLOAT16_VALUE(863.50), SIMDE_FLOAT16_VALUE(828.50), - SIMDE_FLOAT16_VALUE(-563.50), SIMDE_FLOAT16_VALUE(-576.50)}, - {SIMDE_FLOAT16_VALUE(-703.50), SIMDE_FLOAT16_VALUE(384.00), - SIMDE_FLOAT16_VALUE(-772.50), SIMDE_FLOAT16_VALUE(457.50), - SIMDE_FLOAT16_VALUE(296.00), SIMDE_FLOAT16_VALUE(653.00), - SIMDE_FLOAT16_VALUE(-121.00), SIMDE_FLOAT16_VALUE(945.50)}, - {SIMDE_FLOAT16_VALUE(-280.75), SIMDE_FLOAT16_VALUE(-31.45), - SIMDE_FLOAT16_VALUE(688.50), SIMDE_FLOAT16_VALUE(192.00)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(11456.00), SIMDE_FLOAT16_VALUE(-12152.00), - SIMDE_FLOAT16_VALUE(13792.00), SIMDE_FLOAT16_VALUE(-13576.00), - SIMDE_FLOAT16_VALUE(21408.00), SIMDE_FLOAT16_VALUE(-19712.00), - SIMDE_FLOAT16_VALUE(29168.00), SIMDE_FLOAT16_VALUE(-30320.00)}}, - {{SIMDE_FLOAT16_VALUE(-523.00), SIMDE_FLOAT16_VALUE(-323.75), - SIMDE_FLOAT16_VALUE(-888.00), SIMDE_FLOAT16_VALUE(-283.75), - SIMDE_FLOAT16_VALUE(-117.75), SIMDE_FLOAT16_VALUE(-841.50), - SIMDE_FLOAT16_VALUE(665.00), SIMDE_FLOAT16_VALUE(-987.00)}, - {SIMDE_FLOAT16_VALUE(-643.00), SIMDE_FLOAT16_VALUE(-152.12), - SIMDE_FLOAT16_VALUE(964.00), SIMDE_FLOAT16_VALUE(920.00), - SIMDE_FLOAT16_VALUE(630.50), SIMDE_FLOAT16_VALUE(-669.50), - SIMDE_FLOAT16_VALUE(671.00), SIMDE_FLOAT16_VALUE(257.00)}, - {SIMDE_FLOAT16_VALUE(10.22), SIMDE_FLOAT16_VALUE(-857.50), - SIMDE_FLOAT16_VALUE(334.75), SIMDE_FLOAT16_VALUE(-617.50)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(1032.00), SIMDE_FLOAT16_VALUE(-1878.00), - SIMDE_FLOAT16_VALUE(-10288.00), SIMDE_FLOAT16_VALUE(9120.00), - SIMDE_FLOAT16_VALUE(6724.00), SIMDE_FLOAT16_VALUE(-7684.00), - SIMDE_FLOAT16_VALUE(-1961.00), SIMDE_FLOAT16_VALUE(1639.00)}}, - {{SIMDE_FLOAT16_VALUE(-439.50), SIMDE_FLOAT16_VALUE(245.12), - SIMDE_FLOAT16_VALUE(111.06), SIMDE_FLOAT16_VALUE(520.50), - SIMDE_FLOAT16_VALUE(85.50), SIMDE_FLOAT16_VALUE(250.25), - SIMDE_FLOAT16_VALUE(-680.00), SIMDE_FLOAT16_VALUE(-750.00)}, - {SIMDE_FLOAT16_VALUE(-138.25), SIMDE_FLOAT16_VALUE(-14.62), - SIMDE_FLOAT16_VALUE(-921.50), SIMDE_FLOAT16_VALUE(225.88), - SIMDE_FLOAT16_VALUE(242.88), SIMDE_FLOAT16_VALUE(869.50), - SIMDE_FLOAT16_VALUE(298.00), SIMDE_FLOAT16_VALUE(105.69)}, - {SIMDE_FLOAT16_VALUE(-722.50), SIMDE_FLOAT16_VALUE(-8.75), - SIMDE_FLOAT16_VALUE(-245.75), SIMDE_FLOAT16_VALUE(915.50)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-567.50), SIMDE_FLOAT16_VALUE(373.00), - SIMDE_FLOAT16_VALUE(2088.00), SIMDE_FLOAT16_VALUE(-1456.00), - SIMDE_FLOAT16_VALUE(7692.00), SIMDE_FLOAT16_VALUE(-7356.00), - SIMDE_FLOAT16_VALUE(244.75), SIMDE_FLOAT16_VALUE(-1675.00)}}, - {{SIMDE_FLOAT16_VALUE(54.19), SIMDE_FLOAT16_VALUE(-928.00), - SIMDE_FLOAT16_VALUE(362.50), SIMDE_FLOAT16_VALUE(-936.50), - SIMDE_FLOAT16_VALUE(185.88), SIMDE_FLOAT16_VALUE(-244.38), - SIMDE_FLOAT16_VALUE(924.50), SIMDE_FLOAT16_VALUE(-644.00)}, - {SIMDE_FLOAT16_VALUE(-517.00), SIMDE_FLOAT16_VALUE(-615.00), - SIMDE_FLOAT16_VALUE(-751.50), SIMDE_FLOAT16_VALUE(-974.00), - SIMDE_FLOAT16_VALUE(-144.38), SIMDE_FLOAT16_VALUE(338.25), - SIMDE_FLOAT16_VALUE(705.00), SIMDE_FLOAT16_VALUE(116.88)}, - {SIMDE_FLOAT16_VALUE(49.38), SIMDE_FLOAT16_VALUE(-363.00), - SIMDE_FLOAT16_VALUE(-476.25), SIMDE_FLOAT16_VALUE(106.69)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(30416.00), SIMDE_FLOAT16_VALUE(-31296.00), - SIMDE_FLOAT16_VALUE(48448.00), SIMDE_FLOAT16_VALUE(-49024.00), - SIMDE_FLOAT16_VALUE(-16512.00), SIMDE_FLOAT16_VALUE(16464.00), - SIMDE_FLOAT16_VALUE(-4848.00), SIMDE_FLOAT16_VALUE(5128.00)}}, - {{SIMDE_FLOAT16_VALUE(-726.00), SIMDE_FLOAT16_VALUE(-353.75), - SIMDE_FLOAT16_VALUE(268.50), SIMDE_FLOAT16_VALUE(729.00), - SIMDE_FLOAT16_VALUE(-470.25), SIMDE_FLOAT16_VALUE(81.88), - SIMDE_FLOAT16_VALUE(72.25), SIMDE_FLOAT16_VALUE(-992.50)}, - {SIMDE_FLOAT16_VALUE(-615.50), SIMDE_FLOAT16_VALUE(620.50), - SIMDE_FLOAT16_VALUE(-606.50), SIMDE_FLOAT16_VALUE(-327.75), - SIMDE_FLOAT16_VALUE(-331.75), SIMDE_FLOAT16_VALUE(-606.00), - SIMDE_FLOAT16_VALUE(-295.75), SIMDE_FLOAT16_VALUE(-275.50)}, - {SIMDE_FLOAT16_VALUE(-752.50), SIMDE_FLOAT16_VALUE(21.91), - SIMDE_FLOAT16_VALUE(827.00), SIMDE_FLOAT16_VALUE(600.00)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-14320.00), SIMDE_FLOAT16_VALUE(13240.00), - SIMDE_FLOAT16_VALUE(7448.00), SIMDE_FLOAT16_VALUE(-6452.00), - SIMDE_FLOAT16_VALUE(12808.00), SIMDE_FLOAT16_VALUE(-13192.00), - SIMDE_FLOAT16_VALUE(6108.00), SIMDE_FLOAT16_VALUE(-7028.00)}} - + { { SIMDE_FLOAT16_VALUE( 226.38), SIMDE_FLOAT16_VALUE( 178.88), SIMDE_FLOAT16_VALUE( 181.75), SIMDE_FLOAT16_VALUE( 420.50), + SIMDE_FLOAT16_VALUE( -699.00), SIMDE_FLOAT16_VALUE( -151.12), SIMDE_FLOAT16_VALUE( -388.25), SIMDE_FLOAT16_VALUE( 350.75) }, + { SIMDE_FLOAT16_VALUE( 890.50), SIMDE_FLOAT16_VALUE( -665.00), SIMDE_FLOAT16_VALUE( -618.00), SIMDE_FLOAT16_VALUE( 190.88), + SIMDE_FLOAT16_VALUE( 218.12), SIMDE_FLOAT16_VALUE( -329.00), SIMDE_FLOAT16_VALUE( -549.50), SIMDE_FLOAT16_VALUE( -460.00) }, + { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -695.50), SIMDE_FLOAT16_VALUE( 121.50), SIMDE_FLOAT16_VALUE( -78.44) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-32544.00), SIMDE_FLOAT16_VALUE( 32960.00), SIMDE_FLOAT16_VALUE( 9592.00), SIMDE_FLOAT16_VALUE( -8984.00), + SIMDE_FLOAT16_VALUE(-16912.00), SIMDE_FLOAT16_VALUE( 16064.00), SIMDE_FLOAT16_VALUE(-23056.00), SIMDE_FLOAT16_VALUE( 23024.00) } }, + { { SIMDE_FLOAT16_VALUE( -869.50), SIMDE_FLOAT16_VALUE( -710.00), SIMDE_FLOAT16_VALUE( 457.25), SIMDE_FLOAT16_VALUE( 579.00), + SIMDE_FLOAT16_VALUE( -786.50), SIMDE_FLOAT16_VALUE( -974.00), SIMDE_FLOAT16_VALUE( -570.50), SIMDE_FLOAT16_VALUE( 369.25) }, + { SIMDE_FLOAT16_VALUE( 48.06), SIMDE_FLOAT16_VALUE( -40.00), SIMDE_FLOAT16_VALUE( -52.00), SIMDE_FLOAT16_VALUE( 75.88), + SIMDE_FLOAT16_VALUE( 352.25), SIMDE_FLOAT16_VALUE( 324.25), SIMDE_FLOAT16_VALUE( 485.50), SIMDE_FLOAT16_VALUE( 43.50) }, + { SIMDE_FLOAT16_VALUE( -976.00), SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -100.62), SIMDE_FLOAT16_VALUE( -463.75) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 2708.00), SIMDE_FLOAT16_VALUE( -4288.00), SIMDE_FLOAT16_VALUE( -6328.00), SIMDE_FLOAT16_VALUE( 7364.00), + SIMDE_FLOAT16_VALUE(-29792.00), SIMDE_FLOAT16_VALUE( 28032.00), SIMDE_FLOAT16_VALUE( -4460.00), SIMDE_FLOAT16_VALUE( 4260.00) } }, + { { SIMDE_FLOAT16_VALUE( -973.00), SIMDE_FLOAT16_VALUE( -159.62), SIMDE_FLOAT16_VALUE( -377.00), SIMDE_FLOAT16_VALUE( -853.00), + SIMDE_FLOAT16_VALUE( 831.00), SIMDE_FLOAT16_VALUE( 661.00), SIMDE_FLOAT16_VALUE( -717.00), SIMDE_FLOAT16_VALUE( 891.50) }, + { SIMDE_FLOAT16_VALUE( 535.50), SIMDE_FLOAT16_VALUE( 621.50), SIMDE_FLOAT16_VALUE( 578.50), SIMDE_FLOAT16_VALUE( 430.50), + SIMDE_FLOAT16_VALUE( -574.50), SIMDE_FLOAT16_VALUE( -319.75), SIMDE_FLOAT16_VALUE( -735.00), SIMDE_FLOAT16_VALUE( 248.50) }, + { SIMDE_FLOAT16_VALUE( 32.59), SIMDE_FLOAT16_VALUE( 192.38), SIMDE_FLOAT16_VALUE( 679.00), SIMDE_FLOAT16_VALUE( 502.25) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-21232.00), SIMDE_FLOAT16_VALUE( 20096.00), SIMDE_FLOAT16_VALUE(-14408.00), SIMDE_FLOAT16_VALUE( 13176.00), + SIMDE_FLOAT16_VALUE( 11256.00), SIMDE_FLOAT16_VALUE( -9760.00), SIMDE_FLOAT16_VALUE( -8816.00), SIMDE_FLOAT16_VALUE( 8992.00) } }, + { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( -70.81), SIMDE_FLOAT16_VALUE( -596.50), SIMDE_FLOAT16_VALUE( 810.50), + SIMDE_FLOAT16_VALUE( 863.50), SIMDE_FLOAT16_VALUE( 828.50), SIMDE_FLOAT16_VALUE( -563.50), SIMDE_FLOAT16_VALUE( -576.50) }, + { SIMDE_FLOAT16_VALUE( -703.50), SIMDE_FLOAT16_VALUE( 384.00), SIMDE_FLOAT16_VALUE( -772.50), SIMDE_FLOAT16_VALUE( 457.50), + SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( 653.00), SIMDE_FLOAT16_VALUE( -121.00), SIMDE_FLOAT16_VALUE( 945.50) }, + { SIMDE_FLOAT16_VALUE( -280.75), SIMDE_FLOAT16_VALUE( -31.45), SIMDE_FLOAT16_VALUE( 688.50), SIMDE_FLOAT16_VALUE( 192.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 11456.00), SIMDE_FLOAT16_VALUE(-12152.00), SIMDE_FLOAT16_VALUE( 13792.00), SIMDE_FLOAT16_VALUE(-13576.00), + SIMDE_FLOAT16_VALUE( 21408.00), SIMDE_FLOAT16_VALUE(-19712.00), SIMDE_FLOAT16_VALUE( 29168.00), SIMDE_FLOAT16_VALUE(-30320.00) } }, + { { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -323.75), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -283.75), + SIMDE_FLOAT16_VALUE( -117.75), SIMDE_FLOAT16_VALUE( -841.50), SIMDE_FLOAT16_VALUE( 665.00), SIMDE_FLOAT16_VALUE( -987.00) }, + { SIMDE_FLOAT16_VALUE( -643.00), SIMDE_FLOAT16_VALUE( -152.12), SIMDE_FLOAT16_VALUE( 964.00), SIMDE_FLOAT16_VALUE( 920.00), + SIMDE_FLOAT16_VALUE( 630.50), SIMDE_FLOAT16_VALUE( -669.50), SIMDE_FLOAT16_VALUE( 671.00), SIMDE_FLOAT16_VALUE( 257.00) }, + { SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( -857.50), SIMDE_FLOAT16_VALUE( 334.75), SIMDE_FLOAT16_VALUE( -617.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 1032.00), SIMDE_FLOAT16_VALUE( -1878.00), SIMDE_FLOAT16_VALUE(-10288.00), SIMDE_FLOAT16_VALUE( 9120.00), + SIMDE_FLOAT16_VALUE( 6724.00), SIMDE_FLOAT16_VALUE( -7684.00), SIMDE_FLOAT16_VALUE( -1961.00), SIMDE_FLOAT16_VALUE( 1639.00) } }, + { { SIMDE_FLOAT16_VALUE( -439.50), SIMDE_FLOAT16_VALUE( 245.12), SIMDE_FLOAT16_VALUE( 111.06), SIMDE_FLOAT16_VALUE( 520.50), + SIMDE_FLOAT16_VALUE( 85.50), SIMDE_FLOAT16_VALUE( 250.25), SIMDE_FLOAT16_VALUE( -680.00), SIMDE_FLOAT16_VALUE( -750.00) }, + { SIMDE_FLOAT16_VALUE( -138.25), SIMDE_FLOAT16_VALUE( -14.62), SIMDE_FLOAT16_VALUE( -921.50), SIMDE_FLOAT16_VALUE( 225.88), + SIMDE_FLOAT16_VALUE( 242.88), SIMDE_FLOAT16_VALUE( 869.50), SIMDE_FLOAT16_VALUE( 298.00), SIMDE_FLOAT16_VALUE( 105.69) }, + { SIMDE_FLOAT16_VALUE( -722.50), SIMDE_FLOAT16_VALUE( -8.75), SIMDE_FLOAT16_VALUE( -245.75), SIMDE_FLOAT16_VALUE( 915.50) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -567.50), SIMDE_FLOAT16_VALUE( 373.00), SIMDE_FLOAT16_VALUE( 2088.00), SIMDE_FLOAT16_VALUE( -1456.00), + SIMDE_FLOAT16_VALUE( 7692.00), SIMDE_FLOAT16_VALUE( -7356.00), SIMDE_FLOAT16_VALUE( 244.75), SIMDE_FLOAT16_VALUE( -1675.00) } }, + { { SIMDE_FLOAT16_VALUE( 54.19), SIMDE_FLOAT16_VALUE( -928.00), SIMDE_FLOAT16_VALUE( 362.50), SIMDE_FLOAT16_VALUE( -936.50), + SIMDE_FLOAT16_VALUE( 185.88), SIMDE_FLOAT16_VALUE( -244.38), SIMDE_FLOAT16_VALUE( 924.50), SIMDE_FLOAT16_VALUE( -644.00) }, + { SIMDE_FLOAT16_VALUE( -517.00), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -751.50), SIMDE_FLOAT16_VALUE( -974.00), + SIMDE_FLOAT16_VALUE( -144.38), SIMDE_FLOAT16_VALUE( 338.25), SIMDE_FLOAT16_VALUE( 705.00), SIMDE_FLOAT16_VALUE( 116.88) }, + { SIMDE_FLOAT16_VALUE( 49.38), SIMDE_FLOAT16_VALUE( -363.00), SIMDE_FLOAT16_VALUE( -476.25), SIMDE_FLOAT16_VALUE( 106.69) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE( 30416.00), SIMDE_FLOAT16_VALUE(-31296.00), SIMDE_FLOAT16_VALUE( 48448.00), SIMDE_FLOAT16_VALUE(-49024.00), + SIMDE_FLOAT16_VALUE(-16512.00), SIMDE_FLOAT16_VALUE( 16464.00), SIMDE_FLOAT16_VALUE( -4848.00), SIMDE_FLOAT16_VALUE( 5128.00) } }, + { { SIMDE_FLOAT16_VALUE( -726.00), SIMDE_FLOAT16_VALUE( -353.75), SIMDE_FLOAT16_VALUE( 268.50), SIMDE_FLOAT16_VALUE( 729.00), + SIMDE_FLOAT16_VALUE( -470.25), SIMDE_FLOAT16_VALUE( 81.88), SIMDE_FLOAT16_VALUE( 72.25), SIMDE_FLOAT16_VALUE( -992.50) }, + { SIMDE_FLOAT16_VALUE( -615.50), SIMDE_FLOAT16_VALUE( 620.50), SIMDE_FLOAT16_VALUE( -606.50), SIMDE_FLOAT16_VALUE( -327.75), + SIMDE_FLOAT16_VALUE( -331.75), SIMDE_FLOAT16_VALUE( -606.00), SIMDE_FLOAT16_VALUE( -295.75), SIMDE_FLOAT16_VALUE( -275.50) }, + { SIMDE_FLOAT16_VALUE( -752.50), SIMDE_FLOAT16_VALUE( 21.91), SIMDE_FLOAT16_VALUE( 827.00), SIMDE_FLOAT16_VALUE( 600.00) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE(-14320.00), SIMDE_FLOAT16_VALUE( 13240.00), SIMDE_FLOAT16_VALUE( 7448.00), SIMDE_FLOAT16_VALUE( -6452.00), + SIMDE_FLOAT16_VALUE( 12808.00), SIMDE_FLOAT16_VALUE(-13192.00), SIMDE_FLOAT16_VALUE( 6108.00), SIMDE_FLOAT16_VALUE( -7028.00) } } }; - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x8_t r; - SIMDE_CONSTIFY_2_( - simde_vcmlaq_rot90_lane_f16, r, - (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_(simde_vcmlaq_rot90_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); - simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), - 1); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-1000.0f, 1000.0f); simde_float16x8_t r = simde_vcmlaq_lane_f16(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -589,7 +448,8 @@ static int test_simde_vcmlaq_rot90_lane_f16(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmlaq_rot90_lane_f32(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmlaq_rot90_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[4]; @@ -598,90 +458,66 @@ static int test_simde_vcmlaq_rot90_lane_f32(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[4]; } test_vec[] = { - {{SIMDE_FLOAT32_C(301.65), SIMDE_FLOAT32_C(490.71), - SIMDE_FLOAT32_C(-744.66), SIMDE_FLOAT32_C(-738.17)}, - {SIMDE_FLOAT32_C(-301.20), SIMDE_FLOAT32_C(-904.34), - SIMDE_FLOAT32_C(771.98), SIMDE_FLOAT32_C(233.71)}, - {SIMDE_FLOAT32_C(830.18), SIMDE_FLOAT32_C(979.39)}, - INT32_C(0), - {SIMDE_FLOAT32_C(751066.625000), SIMDE_FLOAT32_C(-750274.312500), - SIMDE_FLOAT32_C(-194766.031250), SIMDE_FLOAT32_C(193283.203125)}}, - {{SIMDE_FLOAT32_C(-38.01), SIMDE_FLOAT32_C(151.61), - SIMDE_FLOAT32_C(201.45), SIMDE_FLOAT32_C(-747.32)}, - {SIMDE_FLOAT32_C(-331.17), SIMDE_FLOAT32_C(7.62), - SIMDE_FLOAT32_C(-454.77), SIMDE_FLOAT32_C(-381.81)}, - {SIMDE_FLOAT32_C(236.39), SIMDE_FLOAT32_C(-158.94)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-1839.301758), SIMDE_FLOAT32_C(1952.901733), - SIMDE_FLOAT32_C(90457.515625), SIMDE_FLOAT32_C(-91003.382812)}}, - {{SIMDE_FLOAT32_C(605.85), SIMDE_FLOAT32_C(244.27), - SIMDE_FLOAT32_C(-426.53), SIMDE_FLOAT32_C(-969.18)}, - {SIMDE_FLOAT32_C(322.13), SIMDE_FLOAT32_C(863.77), - SIMDE_FLOAT32_C(-685.35), SIMDE_FLOAT32_C(-710.70)}, - {SIMDE_FLOAT32_C(-791.12), SIMDE_FLOAT32_C(373.53)}, - INT32_C(0), - {SIMDE_FLOAT32_C(683951.562500), SIMDE_FLOAT32_C(-683101.437500), - SIMDE_FLOAT32_C(-562675.500000), SIMDE_FLOAT32_C(561279.812500)}}, - {{SIMDE_FLOAT32_C(-606.46), SIMDE_FLOAT32_C(-507.55), - SIMDE_FLOAT32_C(-68.24), SIMDE_FLOAT32_C(-823.05)}, - {SIMDE_FLOAT32_C(-359.95), SIMDE_FLOAT32_C(611.92), - SIMDE_FLOAT32_C(514.14), SIMDE_FLOAT32_C(-660.86)}, - {SIMDE_FLOAT32_C(181.81), SIMDE_FLOAT32_C(115.86)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-111859.632812), SIMDE_FLOAT32_C(110745.617188), - SIMDE_FLOAT32_C(120082.710938), SIMDE_FLOAT32_C(-120974.000000)}}, - {{SIMDE_FLOAT32_C(951.84), SIMDE_FLOAT32_C(-864.96), - SIMDE_FLOAT32_C(970.59), SIMDE_FLOAT32_C(769.97)}, - {SIMDE_FLOAT32_C(609.64), SIMDE_FLOAT32_C(-580.87), - SIMDE_FLOAT32_C(358.59), SIMDE_FLOAT32_C(350.56)}, - {SIMDE_FLOAT32_C(26.31), SIMDE_FLOAT32_C(163.74)}, - INT32_C(0), - {SIMDE_FLOAT32_C(16234.529297), SIMDE_FLOAT32_C(-16147.649414), - SIMDE_FLOAT32_C(-8252.643555), SIMDE_FLOAT32_C(9993.203125)}}, - {{SIMDE_FLOAT32_C(-636.68), SIMDE_FLOAT32_C(554.68), - SIMDE_FLOAT32_C(-385.40), SIMDE_FLOAT32_C(-565.95)}, - {SIMDE_FLOAT32_C(129.69), SIMDE_FLOAT32_C(961.79), - SIMDE_FLOAT32_C(-333.22), SIMDE_FLOAT32_C(69.65)}, - {SIMDE_FLOAT32_C(726.96), SIMDE_FLOAT32_C(131.41)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-699819.562500), SIMDE_FLOAT32_C(699737.562500), - SIMDE_FLOAT32_C(-51018.167969), SIMDE_FLOAT32_C(50066.816406)}}, - {{SIMDE_FLOAT32_C(-211.91), SIMDE_FLOAT32_C(829.24), - SIMDE_FLOAT32_C(-475.13), SIMDE_FLOAT32_C(562.33)}, - {SIMDE_FLOAT32_C(515.59), SIMDE_FLOAT32_C(-290.69), - SIMDE_FLOAT32_C(-816.53), SIMDE_FLOAT32_C(17.39)}, - {SIMDE_FLOAT32_C(268.14), SIMDE_FLOAT32_C(729.88)}, - INT32_C(0), - {SIMDE_FLOAT32_C(77733.710938), SIMDE_FLOAT32_C(-77116.382812), - SIMDE_FLOAT32_C(-5138.084473), SIMDE_FLOAT32_C(5225.284668)}}, - {{SIMDE_FLOAT32_C(-894.99), SIMDE_FLOAT32_C(516.42), - SIMDE_FLOAT32_C(-169.55), SIMDE_FLOAT32_C(696.41)}, - {SIMDE_FLOAT32_C(-388.51), SIMDE_FLOAT32_C(987.71), - SIMDE_FLOAT32_C(-91.49), SIMDE_FLOAT32_C(-970.85)}, - {SIMDE_FLOAT32_C(357.28), SIMDE_FLOAT32_C(-28.01)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-353784.031250), SIMDE_FLOAT32_C(353405.468750), - SIMDE_FLOAT32_C(346695.718750), SIMDE_FLOAT32_C(-346168.875000)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT32_C( 301.65), SIMDE_FLOAT32_C( 490.71), SIMDE_FLOAT32_C( -744.66), SIMDE_FLOAT32_C( -738.17) }, + { SIMDE_FLOAT32_C( -301.20), SIMDE_FLOAT32_C( -904.34), SIMDE_FLOAT32_C( 771.98), SIMDE_FLOAT32_C( 233.71) }, + { SIMDE_FLOAT32_C( 830.18), SIMDE_FLOAT32_C( 979.39) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(751066.625000), SIMDE_FLOAT32_C(-750274.312500), SIMDE_FLOAT32_C(-194766.031250), SIMDE_FLOAT32_C(193283.203125) } }, + { { SIMDE_FLOAT32_C( -38.01), SIMDE_FLOAT32_C( 151.61), SIMDE_FLOAT32_C( 201.45), SIMDE_FLOAT32_C( -747.32) }, + { SIMDE_FLOAT32_C( -331.17), SIMDE_FLOAT32_C( 7.62), SIMDE_FLOAT32_C( -454.77), SIMDE_FLOAT32_C( -381.81) }, + { SIMDE_FLOAT32_C( 236.39), SIMDE_FLOAT32_C( -158.94) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-1839.301758), SIMDE_FLOAT32_C(1952.901733), SIMDE_FLOAT32_C(90457.515625), SIMDE_FLOAT32_C(-91003.382812) } }, + { { SIMDE_FLOAT32_C( 605.85), SIMDE_FLOAT32_C( 244.27), SIMDE_FLOAT32_C( -426.53), SIMDE_FLOAT32_C( -969.18) }, + { SIMDE_FLOAT32_C( 322.13), SIMDE_FLOAT32_C( 863.77), SIMDE_FLOAT32_C( -685.35), SIMDE_FLOAT32_C( -710.70) }, + { SIMDE_FLOAT32_C( -791.12), SIMDE_FLOAT32_C( 373.53) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(683951.562500), SIMDE_FLOAT32_C(-683101.437500), SIMDE_FLOAT32_C(-562675.500000), SIMDE_FLOAT32_C(561279.812500) } }, + { { SIMDE_FLOAT32_C( -606.46), SIMDE_FLOAT32_C( -507.55), SIMDE_FLOAT32_C( -68.24), SIMDE_FLOAT32_C( -823.05) }, + { SIMDE_FLOAT32_C( -359.95), SIMDE_FLOAT32_C( 611.92), SIMDE_FLOAT32_C( 514.14), SIMDE_FLOAT32_C( -660.86) }, + { SIMDE_FLOAT32_C( 181.81), SIMDE_FLOAT32_C( 115.86) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-111859.632812), SIMDE_FLOAT32_C(110745.617188), SIMDE_FLOAT32_C(120082.710938), SIMDE_FLOAT32_C(-120974.000000) } }, + { { SIMDE_FLOAT32_C( 951.84), SIMDE_FLOAT32_C( -864.96), SIMDE_FLOAT32_C( 970.59), SIMDE_FLOAT32_C( 769.97) }, + { SIMDE_FLOAT32_C( 609.64), SIMDE_FLOAT32_C( -580.87), SIMDE_FLOAT32_C( 358.59), SIMDE_FLOAT32_C( 350.56) }, + { SIMDE_FLOAT32_C( 26.31), SIMDE_FLOAT32_C( 163.74) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(16234.529297), SIMDE_FLOAT32_C(-16147.649414), SIMDE_FLOAT32_C(-8252.643555), SIMDE_FLOAT32_C(9993.203125) } }, + { { SIMDE_FLOAT32_C( -636.68), SIMDE_FLOAT32_C( 554.68), SIMDE_FLOAT32_C( -385.40), SIMDE_FLOAT32_C( -565.95) }, + { SIMDE_FLOAT32_C( 129.69), SIMDE_FLOAT32_C( 961.79), SIMDE_FLOAT32_C( -333.22), SIMDE_FLOAT32_C( 69.65) }, + { SIMDE_FLOAT32_C( 726.96), SIMDE_FLOAT32_C( 131.41) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-699819.562500), SIMDE_FLOAT32_C(699737.562500), SIMDE_FLOAT32_C(-51018.167969), SIMDE_FLOAT32_C(50066.816406) } }, + { { SIMDE_FLOAT32_C( -211.91), SIMDE_FLOAT32_C( 829.24), SIMDE_FLOAT32_C( -475.13), SIMDE_FLOAT32_C( 562.33) }, + { SIMDE_FLOAT32_C( 515.59), SIMDE_FLOAT32_C( -290.69), SIMDE_FLOAT32_C( -816.53), SIMDE_FLOAT32_C( 17.39) }, + { SIMDE_FLOAT32_C( 268.14), SIMDE_FLOAT32_C( 729.88) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(77733.710938), SIMDE_FLOAT32_C(-77116.382812), SIMDE_FLOAT32_C(-5138.084473), SIMDE_FLOAT32_C(5225.284668) } }, + { { SIMDE_FLOAT32_C( -894.99), SIMDE_FLOAT32_C( 516.42), SIMDE_FLOAT32_C( -169.55), SIMDE_FLOAT32_C( 696.41) }, + { SIMDE_FLOAT32_C( -388.51), SIMDE_FLOAT32_C( 987.71), SIMDE_FLOAT32_C( -91.49), SIMDE_FLOAT32_C( -970.85) }, + { SIMDE_FLOAT32_C( 357.28), SIMDE_FLOAT32_C( -28.01) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-353784.031250), SIMDE_FLOAT32_C(353405.468750), SIMDE_FLOAT32_C(346695.718750), SIMDE_FLOAT32_C(-346168.875000) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x4_t r = simde_vcmlaq_rot90_lane_f32(r_, a, b, 0); - simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), - 1); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 0, 0, 0, 0, 0, 0, 0}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcmlaq_lane_f32(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -692,7 +528,8 @@ static int test_simde_vcmlaq_rot90_lane_f32(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmlaq_rot90_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmlaq_rot90_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float16_t r_[8]; @@ -701,167 +538,100 @@ static int test_simde_vcmlaq_rot90_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float16_t r[8]; } test_vec[] = { - {{SIMDE_FLOAT16_VALUE(-30.36), SIMDE_FLOAT16_VALUE(631.50), - SIMDE_FLOAT16_VALUE(851.00), SIMDE_FLOAT16_VALUE(-263.50), - SIMDE_FLOAT16_VALUE(140.00), SIMDE_FLOAT16_VALUE(859.00), - SIMDE_FLOAT16_VALUE(-834.50), SIMDE_FLOAT16_VALUE(216.12)}, - {SIMDE_FLOAT16_VALUE(996.00), SIMDE_FLOAT16_VALUE(529.50), - SIMDE_FLOAT16_VALUE(79.06), SIMDE_FLOAT16_VALUE(947.00), - SIMDE_FLOAT16_VALUE(122.00), SIMDE_FLOAT16_VALUE(-250.00), - SIMDE_FLOAT16_VALUE(-361.75), SIMDE_FLOAT16_VALUE(265.25)}, - {SIMDE_FLOAT16_VALUE(58.66), SIMDE_FLOAT16_VALUE(2.71), - SIMDE_FLOAT16_VALUE(99.81), SIMDE_FLOAT16_VALUE(-137.62), - SIMDE_FLOAT16_VALUE(-761.00), SIMDE_FLOAT16_VALUE(813.00), - SIMDE_FLOAT16_VALUE(-897.50), SIMDE_FLOAT16_VALUE(653.50)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-31088.00), SIMDE_FLOAT16_VALUE(31696.00), - SIMDE_FLOAT16_VALUE(-54688.00), SIMDE_FLOAT16_VALUE(55296.00), - SIMDE_FLOAT16_VALUE(14808.00), SIMDE_FLOAT16_VALUE(-13808.00), - SIMDE_FLOAT16_VALUE(-16400.00), SIMDE_FLOAT16_VALUE(15776.00)}}, - {{SIMDE_FLOAT16_VALUE(396.00), SIMDE_FLOAT16_VALUE(413.00), - SIMDE_FLOAT16_VALUE(514.00), SIMDE_FLOAT16_VALUE(-977.50), - SIMDE_FLOAT16_VALUE(-672.00), SIMDE_FLOAT16_VALUE(-92.12), - SIMDE_FLOAT16_VALUE(-441.25), SIMDE_FLOAT16_VALUE(-374.25)}, - {SIMDE_FLOAT16_VALUE(-152.00), SIMDE_FLOAT16_VALUE(-79.56), - SIMDE_FLOAT16_VALUE(-214.62), SIMDE_FLOAT16_VALUE(-615.00), - SIMDE_FLOAT16_VALUE(79.00), SIMDE_FLOAT16_VALUE(84.00), - SIMDE_FLOAT16_VALUE(493.00), SIMDE_FLOAT16_VALUE(-96.00)}, - {SIMDE_FLOAT16_VALUE(104.12), SIMDE_FLOAT16_VALUE(78.50), - SIMDE_FLOAT16_VALUE(171.50), SIMDE_FLOAT16_VALUE(-682.50), - SIMDE_FLOAT16_VALUE(217.12), SIMDE_FLOAT16_VALUE(49.34), - SIMDE_FLOAT16_VALUE(256.50), SIMDE_FLOAT16_VALUE(-92.06)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(6640.00), SIMDE_FLOAT16_VALUE(-5832.00), - SIMDE_FLOAT16_VALUE(48800.00), SIMDE_FLOAT16_VALUE(-49248.00), - SIMDE_FLOAT16_VALUE(-7264.00), SIMDE_FLOAT16_VALUE(6500.00), - SIMDE_FLOAT16_VALUE(7096.00), SIMDE_FLOAT16_VALUE(-7912.00)}}, - {{SIMDE_FLOAT16_VALUE(-728.00), SIMDE_FLOAT16_VALUE(-108.38), - SIMDE_FLOAT16_VALUE(-77.88), SIMDE_FLOAT16_VALUE(-353.00), - SIMDE_FLOAT16_VALUE(-239.00), SIMDE_FLOAT16_VALUE(704.50), - SIMDE_FLOAT16_VALUE(914.00), SIMDE_FLOAT16_VALUE(-211.12)}, - {SIMDE_FLOAT16_VALUE(-473.25), SIMDE_FLOAT16_VALUE(74.38), - SIMDE_FLOAT16_VALUE(904.50), SIMDE_FLOAT16_VALUE(-290.50), - SIMDE_FLOAT16_VALUE(-796.00), SIMDE_FLOAT16_VALUE(421.25), - SIMDE_FLOAT16_VALUE(215.75), SIMDE_FLOAT16_VALUE(249.38)}, - {SIMDE_FLOAT16_VALUE(-523.00), SIMDE_FLOAT16_VALUE(-720.00), - SIMDE_FLOAT16_VALUE(32.75), SIMDE_FLOAT16_VALUE(-487.75), - SIMDE_FLOAT16_VALUE(-705.50), SIMDE_FLOAT16_VALUE(-468.00), - SIMDE_FLOAT16_VALUE(-789.00), SIMDE_FLOAT16_VALUE(-866.00)}, - INT32_C(2), - {SIMDE_FLOAT16_VALUE(-3164.00), SIMDE_FLOAT16_VALUE(2328.00), - SIMDE_FLOAT16_VALUE(9440.00), SIMDE_FLOAT16_VALUE(-9864.00), - SIMDE_FLOAT16_VALUE(-14032.00), SIMDE_FLOAT16_VALUE(14504.00), - SIMDE_FLOAT16_VALUE(-7252.00), SIMDE_FLOAT16_VALUE(7956.00)}}, - {{SIMDE_FLOAT16_VALUE(-891.50), SIMDE_FLOAT16_VALUE(-299.00), - SIMDE_FLOAT16_VALUE(-595.00), SIMDE_FLOAT16_VALUE(-662.00), - SIMDE_FLOAT16_VALUE(-914.00), SIMDE_FLOAT16_VALUE(674.50), - SIMDE_FLOAT16_VALUE(771.50), SIMDE_FLOAT16_VALUE(14.33)}, - {SIMDE_FLOAT16_VALUE(880.00), SIMDE_FLOAT16_VALUE(767.00), - SIMDE_FLOAT16_VALUE(-738.50), SIMDE_FLOAT16_VALUE(581.50), - SIMDE_FLOAT16_VALUE(-342.00), SIMDE_FLOAT16_VALUE(580.50), - SIMDE_FLOAT16_VALUE(534.00), SIMDE_FLOAT16_VALUE(-671.00)}, - {SIMDE_FLOAT16_VALUE(-482.75), SIMDE_FLOAT16_VALUE(382.25), - SIMDE_FLOAT16_VALUE(503.00), SIMDE_FLOAT16_VALUE(35.00), - SIMDE_FLOAT16_VALUE(315.50), SIMDE_FLOAT16_VALUE(-23.56), - SIMDE_FLOAT16_VALUE(53.88), SIMDE_FLOAT16_VALUE(722.00)}, - INT32_C(3), - {SIMDE_FLOAT16_VALUE(-27744.00), SIMDE_FLOAT16_VALUE(26544.00), - SIMDE_FLOAT16_VALUE(-20944.00), SIMDE_FLOAT16_VALUE(19696.00), - SIMDE_FLOAT16_VALUE(-21232.00), SIMDE_FLOAT16_VALUE(20992.00), - SIMDE_FLOAT16_VALUE(24256.00), SIMDE_FLOAT16_VALUE(-23472.00)}}, - {{SIMDE_FLOAT16_VALUE(525.50), SIMDE_FLOAT16_VALUE(-679.00), - SIMDE_FLOAT16_VALUE(491.50), SIMDE_FLOAT16_VALUE(-505.00), - SIMDE_FLOAT16_VALUE(914.50), SIMDE_FLOAT16_VALUE(-312.00), - SIMDE_FLOAT16_VALUE(-404.50), SIMDE_FLOAT16_VALUE(-634.00)}, - {SIMDE_FLOAT16_VALUE(-86.62), SIMDE_FLOAT16_VALUE(-914.50), - SIMDE_FLOAT16_VALUE(-839.50), SIMDE_FLOAT16_VALUE(817.50), - SIMDE_FLOAT16_VALUE(-187.25), SIMDE_FLOAT16_VALUE(422.75), - SIMDE_FLOAT16_VALUE(604.50), SIMDE_FLOAT16_VALUE(-735.00)}, - {SIMDE_FLOAT16_VALUE(-21.31), SIMDE_FLOAT16_VALUE(-29.59), - SIMDE_FLOAT16_VALUE(-725.00), SIMDE_FLOAT16_VALUE(-503.00), - SIMDE_FLOAT16_VALUE(-75.56), SIMDE_FLOAT16_VALUE(215.38), - SIMDE_FLOAT16_VALUE(-742.00), SIMDE_FLOAT16_VALUE(-854.00)}, - INT32_C(0), - {SIMDE_FLOAT16_VALUE(-18960.00), SIMDE_FLOAT16_VALUE(18816.00), - SIMDE_FLOAT16_VALUE(17920.00), SIMDE_FLOAT16_VALUE(-17920.00), - SIMDE_FLOAT16_VALUE(9928.00), SIMDE_FLOAT16_VALUE(-9320.00), - SIMDE_FLOAT16_VALUE(-16072.00), SIMDE_FLOAT16_VALUE(15032.00)}}, - {{SIMDE_FLOAT16_VALUE(-618.50), SIMDE_FLOAT16_VALUE(164.88), - SIMDE_FLOAT16_VALUE(304.75), SIMDE_FLOAT16_VALUE(-530.50), - SIMDE_FLOAT16_VALUE(-603.50), SIMDE_FLOAT16_VALUE(730.50), - SIMDE_FLOAT16_VALUE(46.66), SIMDE_FLOAT16_VALUE(629.00)}, - {SIMDE_FLOAT16_VALUE(-537.00), SIMDE_FLOAT16_VALUE(637.00), - SIMDE_FLOAT16_VALUE(884.50), SIMDE_FLOAT16_VALUE(378.25), - SIMDE_FLOAT16_VALUE(-10.17), SIMDE_FLOAT16_VALUE(-730.00), - SIMDE_FLOAT16_VALUE(-981.50), SIMDE_FLOAT16_VALUE(453.25)}, - {SIMDE_FLOAT16_VALUE(-575.00), SIMDE_FLOAT16_VALUE(12.88), - SIMDE_FLOAT16_VALUE(-667.50), SIMDE_FLOAT16_VALUE(380.50), - SIMDE_FLOAT16_VALUE(374.75), SIMDE_FLOAT16_VALUE(-222.50), - SIMDE_FLOAT16_VALUE(206.88), SIMDE_FLOAT16_VALUE(502.25)}, - INT32_C(1), - {SIMDE_FLOAT16_VALUE(-8824.00), SIMDE_FLOAT16_VALUE(8368.00), - SIMDE_FLOAT16_VALUE(-4568.00), SIMDE_FLOAT16_VALUE(4344.00), - SIMDE_FLOAT16_VALUE(8800.00), SIMDE_FLOAT16_VALUE(-8672.00), - SIMDE_FLOAT16_VALUE(-5792.00), SIMDE_FLOAT16_VALUE(6468.00)}}, - {{SIMDE_FLOAT16_VALUE(-825.50), SIMDE_FLOAT16_VALUE(-472.75), - SIMDE_FLOAT16_VALUE(-531.00), SIMDE_FLOAT16_VALUE(-366.75), - SIMDE_FLOAT16_VALUE(143.12), SIMDE_FLOAT16_VALUE(698.50), - SIMDE_FLOAT16_VALUE(700.00), SIMDE_FLOAT16_VALUE(498.25)}, - {SIMDE_FLOAT16_VALUE(908.00), SIMDE_FLOAT16_VALUE(845.50), - SIMDE_FLOAT16_VALUE(-383.50), SIMDE_FLOAT16_VALUE(383.50), - SIMDE_FLOAT16_VALUE(357.75), SIMDE_FLOAT16_VALUE(-900.50), - SIMDE_FLOAT16_VALUE(-802.00), SIMDE_FLOAT16_VALUE(966.50)}, - {SIMDE_FLOAT16_VALUE(-993.00), SIMDE_FLOAT16_VALUE(477.50), - SIMDE_FLOAT16_VALUE(-23.00), SIMDE_FLOAT16_VALUE(102.38), - SIMDE_FLOAT16_VALUE(988.50), SIMDE_FLOAT16_VALUE(-311.75), - SIMDE_FLOAT16_VALUE(-668.50), SIMDE_FLOAT16_VALUE(148.25)}, - INT32_C(2), - {SIMDE_FLOAT16_VALUE(18624.00), SIMDE_FLOAT16_VALUE(-19920.00), - SIMDE_FLOAT16_VALUE(8288.00), SIMDE_FLOAT16_VALUE(-9184.00), - SIMDE_FLOAT16_VALUE(-20576.00), SIMDE_FLOAT16_VALUE(21408.00), - SIMDE_FLOAT16_VALUE(22928.00), SIMDE_FLOAT16_VALUE(-21728.00)}}, - {{SIMDE_FLOAT16_VALUE(213.88), SIMDE_FLOAT16_VALUE(337.75), - SIMDE_FLOAT16_VALUE(330.50), SIMDE_FLOAT16_VALUE(-88.56), - SIMDE_FLOAT16_VALUE(191.12), SIMDE_FLOAT16_VALUE(-615.00), - SIMDE_FLOAT16_VALUE(-113.25), SIMDE_FLOAT16_VALUE(775.50)}, - {SIMDE_FLOAT16_VALUE(295.50), SIMDE_FLOAT16_VALUE(687.00), - SIMDE_FLOAT16_VALUE(406.25), SIMDE_FLOAT16_VALUE(439.50), - SIMDE_FLOAT16_VALUE(-827.50), SIMDE_FLOAT16_VALUE(733.00), - SIMDE_FLOAT16_VALUE(499.00), SIMDE_FLOAT16_VALUE(931.00)}, - {SIMDE_FLOAT16_VALUE(790.00), SIMDE_FLOAT16_VALUE(-979.00), - SIMDE_FLOAT16_VALUE(70.62), SIMDE_FLOAT16_VALUE(-47.00), - SIMDE_FLOAT16_VALUE(228.50), SIMDE_FLOAT16_VALUE(-233.50), - SIMDE_FLOAT16_VALUE(-467.50), SIMDE_FLOAT16_VALUE(545.00)}, - INT32_C(3), - {SIMDE_FLOAT16_VALUE(32496.00), SIMDE_FLOAT16_VALUE(-31952.00), - SIMDE_FLOAT16_VALUE(20992.00), SIMDE_FLOAT16_VALUE(-20752.00), - SIMDE_FLOAT16_VALUE(34656.00), SIMDE_FLOAT16_VALUE(-35072.00), - SIMDE_FLOAT16_VALUE(43648.00), SIMDE_FLOAT16_VALUE(-42976.00)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT16_VALUE( -30.36), SIMDE_FLOAT16_VALUE( 631.50), SIMDE_FLOAT16_VALUE( 851.00), SIMDE_FLOAT16_VALUE( -263.50), + SIMDE_FLOAT16_VALUE( 140.00), SIMDE_FLOAT16_VALUE( 859.00), SIMDE_FLOAT16_VALUE( -834.50), SIMDE_FLOAT16_VALUE( 216.12) }, + { SIMDE_FLOAT16_VALUE( 996.00), SIMDE_FLOAT16_VALUE( 529.50), SIMDE_FLOAT16_VALUE( 79.06), SIMDE_FLOAT16_VALUE( 947.00), + SIMDE_FLOAT16_VALUE( 122.00), SIMDE_FLOAT16_VALUE( -250.00), SIMDE_FLOAT16_VALUE( -361.75), SIMDE_FLOAT16_VALUE( 265.25) }, + { SIMDE_FLOAT16_VALUE( 58.66), SIMDE_FLOAT16_VALUE( 2.71), SIMDE_FLOAT16_VALUE( 99.81), SIMDE_FLOAT16_VALUE( -137.62), + SIMDE_FLOAT16_VALUE( -761.00), SIMDE_FLOAT16_VALUE( 813.00), SIMDE_FLOAT16_VALUE( -897.50), SIMDE_FLOAT16_VALUE( 653.50) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-31088.00), SIMDE_FLOAT16_VALUE( 31696.00), SIMDE_FLOAT16_VALUE(-54688.00), SIMDE_FLOAT16_VALUE( 55296.00), + SIMDE_FLOAT16_VALUE( 14808.00), SIMDE_FLOAT16_VALUE(-13808.00), SIMDE_FLOAT16_VALUE(-16400.00), SIMDE_FLOAT16_VALUE( 15776.00) } }, + { { SIMDE_FLOAT16_VALUE( 396.00), SIMDE_FLOAT16_VALUE( 413.00), SIMDE_FLOAT16_VALUE( 514.00), SIMDE_FLOAT16_VALUE( -977.50), + SIMDE_FLOAT16_VALUE( -672.00), SIMDE_FLOAT16_VALUE( -92.12), SIMDE_FLOAT16_VALUE( -441.25), SIMDE_FLOAT16_VALUE( -374.25) }, + { SIMDE_FLOAT16_VALUE( -152.00), SIMDE_FLOAT16_VALUE( -79.56), SIMDE_FLOAT16_VALUE( -214.62), SIMDE_FLOAT16_VALUE( -615.00), + SIMDE_FLOAT16_VALUE( 79.00), SIMDE_FLOAT16_VALUE( 84.00), SIMDE_FLOAT16_VALUE( 493.00), SIMDE_FLOAT16_VALUE( -96.00) }, + { SIMDE_FLOAT16_VALUE( 104.12), SIMDE_FLOAT16_VALUE( 78.50), SIMDE_FLOAT16_VALUE( 171.50), SIMDE_FLOAT16_VALUE( -682.50), + SIMDE_FLOAT16_VALUE( 217.12), SIMDE_FLOAT16_VALUE( 49.34), SIMDE_FLOAT16_VALUE( 256.50), SIMDE_FLOAT16_VALUE( -92.06) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( 6640.00), SIMDE_FLOAT16_VALUE( -5832.00), SIMDE_FLOAT16_VALUE( 48800.00), SIMDE_FLOAT16_VALUE(-49248.00), + SIMDE_FLOAT16_VALUE( -7264.00), SIMDE_FLOAT16_VALUE( 6500.00), SIMDE_FLOAT16_VALUE( 7096.00), SIMDE_FLOAT16_VALUE( -7912.00) } }, + { { SIMDE_FLOAT16_VALUE( -728.00), SIMDE_FLOAT16_VALUE( -108.38), SIMDE_FLOAT16_VALUE( -77.88), SIMDE_FLOAT16_VALUE( -353.00), + SIMDE_FLOAT16_VALUE( -239.00), SIMDE_FLOAT16_VALUE( 704.50), SIMDE_FLOAT16_VALUE( 914.00), SIMDE_FLOAT16_VALUE( -211.12) }, + { SIMDE_FLOAT16_VALUE( -473.25), SIMDE_FLOAT16_VALUE( 74.38), SIMDE_FLOAT16_VALUE( 904.50), SIMDE_FLOAT16_VALUE( -290.50), + SIMDE_FLOAT16_VALUE( -796.00), SIMDE_FLOAT16_VALUE( 421.25), SIMDE_FLOAT16_VALUE( 215.75), SIMDE_FLOAT16_VALUE( 249.38) }, + { SIMDE_FLOAT16_VALUE( -523.00), SIMDE_FLOAT16_VALUE( -720.00), SIMDE_FLOAT16_VALUE( 32.75), SIMDE_FLOAT16_VALUE( -487.75), + SIMDE_FLOAT16_VALUE( -705.50), SIMDE_FLOAT16_VALUE( -468.00), SIMDE_FLOAT16_VALUE( -789.00), SIMDE_FLOAT16_VALUE( -866.00) }, + INT32_C( 2), + { SIMDE_FLOAT16_VALUE( -3164.00), SIMDE_FLOAT16_VALUE( 2328.00), SIMDE_FLOAT16_VALUE( 9440.00), SIMDE_FLOAT16_VALUE( -9864.00), + SIMDE_FLOAT16_VALUE(-14032.00), SIMDE_FLOAT16_VALUE( 14504.00), SIMDE_FLOAT16_VALUE( -7252.00), SIMDE_FLOAT16_VALUE( 7956.00) } }, + { { SIMDE_FLOAT16_VALUE( -891.50), SIMDE_FLOAT16_VALUE( -299.00), SIMDE_FLOAT16_VALUE( -595.00), SIMDE_FLOAT16_VALUE( -662.00), + SIMDE_FLOAT16_VALUE( -914.00), SIMDE_FLOAT16_VALUE( 674.50), SIMDE_FLOAT16_VALUE( 771.50), SIMDE_FLOAT16_VALUE( 14.33) }, + { SIMDE_FLOAT16_VALUE( 880.00), SIMDE_FLOAT16_VALUE( 767.00), SIMDE_FLOAT16_VALUE( -738.50), SIMDE_FLOAT16_VALUE( 581.50), + SIMDE_FLOAT16_VALUE( -342.00), SIMDE_FLOAT16_VALUE( 580.50), SIMDE_FLOAT16_VALUE( 534.00), SIMDE_FLOAT16_VALUE( -671.00) }, + { SIMDE_FLOAT16_VALUE( -482.75), SIMDE_FLOAT16_VALUE( 382.25), SIMDE_FLOAT16_VALUE( 503.00), SIMDE_FLOAT16_VALUE( 35.00), + SIMDE_FLOAT16_VALUE( 315.50), SIMDE_FLOAT16_VALUE( -23.56), SIMDE_FLOAT16_VALUE( 53.88), SIMDE_FLOAT16_VALUE( 722.00) }, + INT32_C( 3), + { SIMDE_FLOAT16_VALUE(-27744.00), SIMDE_FLOAT16_VALUE( 26544.00), SIMDE_FLOAT16_VALUE(-20944.00), SIMDE_FLOAT16_VALUE( 19696.00), + SIMDE_FLOAT16_VALUE(-21232.00), SIMDE_FLOAT16_VALUE( 20992.00), SIMDE_FLOAT16_VALUE( 24256.00), SIMDE_FLOAT16_VALUE(-23472.00) } }, + { { SIMDE_FLOAT16_VALUE( 525.50), SIMDE_FLOAT16_VALUE( -679.00), SIMDE_FLOAT16_VALUE( 491.50), SIMDE_FLOAT16_VALUE( -505.00), + SIMDE_FLOAT16_VALUE( 914.50), SIMDE_FLOAT16_VALUE( -312.00), SIMDE_FLOAT16_VALUE( -404.50), SIMDE_FLOAT16_VALUE( -634.00) }, + { SIMDE_FLOAT16_VALUE( -86.62), SIMDE_FLOAT16_VALUE( -914.50), SIMDE_FLOAT16_VALUE( -839.50), SIMDE_FLOAT16_VALUE( 817.50), + SIMDE_FLOAT16_VALUE( -187.25), SIMDE_FLOAT16_VALUE( 422.75), SIMDE_FLOAT16_VALUE( 604.50), SIMDE_FLOAT16_VALUE( -735.00) }, + { SIMDE_FLOAT16_VALUE( -21.31), SIMDE_FLOAT16_VALUE( -29.59), SIMDE_FLOAT16_VALUE( -725.00), SIMDE_FLOAT16_VALUE( -503.00), + SIMDE_FLOAT16_VALUE( -75.56), SIMDE_FLOAT16_VALUE( 215.38), SIMDE_FLOAT16_VALUE( -742.00), SIMDE_FLOAT16_VALUE( -854.00) }, + INT32_C( 0), + { SIMDE_FLOAT16_VALUE(-18960.00), SIMDE_FLOAT16_VALUE( 18816.00), SIMDE_FLOAT16_VALUE( 17920.00), SIMDE_FLOAT16_VALUE(-17920.00), + SIMDE_FLOAT16_VALUE( 9928.00), SIMDE_FLOAT16_VALUE( -9320.00), SIMDE_FLOAT16_VALUE(-16072.00), SIMDE_FLOAT16_VALUE( 15032.00) } }, + { { SIMDE_FLOAT16_VALUE( -618.50), SIMDE_FLOAT16_VALUE( 164.88), SIMDE_FLOAT16_VALUE( 304.75), SIMDE_FLOAT16_VALUE( -530.50), + SIMDE_FLOAT16_VALUE( -603.50), SIMDE_FLOAT16_VALUE( 730.50), SIMDE_FLOAT16_VALUE( 46.66), SIMDE_FLOAT16_VALUE( 629.00) }, + { SIMDE_FLOAT16_VALUE( -537.00), SIMDE_FLOAT16_VALUE( 637.00), SIMDE_FLOAT16_VALUE( 884.50), SIMDE_FLOAT16_VALUE( 378.25), + SIMDE_FLOAT16_VALUE( -10.17), SIMDE_FLOAT16_VALUE( -730.00), SIMDE_FLOAT16_VALUE( -981.50), SIMDE_FLOAT16_VALUE( 453.25) }, + { SIMDE_FLOAT16_VALUE( -575.00), SIMDE_FLOAT16_VALUE( 12.88), SIMDE_FLOAT16_VALUE( -667.50), SIMDE_FLOAT16_VALUE( 380.50), + SIMDE_FLOAT16_VALUE( 374.75), SIMDE_FLOAT16_VALUE( -222.50), SIMDE_FLOAT16_VALUE( 206.88), SIMDE_FLOAT16_VALUE( 502.25) }, + INT32_C( 1), + { SIMDE_FLOAT16_VALUE( -8824.00), SIMDE_FLOAT16_VALUE( 8368.00), SIMDE_FLOAT16_VALUE( -4568.00), SIMDE_FLOAT16_VALUE( 4344.00), + SIMDE_FLOAT16_VALUE( 8800.00), SIMDE_FLOAT16_VALUE( -8672.00), SIMDE_FLOAT16_VALUE( -5792.00), SIMDE_FLOAT16_VALUE( 6468.00) } }, + { { SIMDE_FLOAT16_VALUE( -825.50), SIMDE_FLOAT16_VALUE( -472.75), SIMDE_FLOAT16_VALUE( -531.00), SIMDE_FLOAT16_VALUE( -366.75), + SIMDE_FLOAT16_VALUE( 143.12), SIMDE_FLOAT16_VALUE( 698.50), SIMDE_FLOAT16_VALUE( 700.00), SIMDE_FLOAT16_VALUE( 498.25) }, + { SIMDE_FLOAT16_VALUE( 908.00), SIMDE_FLOAT16_VALUE( 845.50), SIMDE_FLOAT16_VALUE( -383.50), SIMDE_FLOAT16_VALUE( 383.50), + SIMDE_FLOAT16_VALUE( 357.75), SIMDE_FLOAT16_VALUE( -900.50), SIMDE_FLOAT16_VALUE( -802.00), SIMDE_FLOAT16_VALUE( 966.50) }, + { SIMDE_FLOAT16_VALUE( -993.00), SIMDE_FLOAT16_VALUE( 477.50), SIMDE_FLOAT16_VALUE( -23.00), SIMDE_FLOAT16_VALUE( 102.38), + SIMDE_FLOAT16_VALUE( 988.50), SIMDE_FLOAT16_VALUE( -311.75), SIMDE_FLOAT16_VALUE( -668.50), SIMDE_FLOAT16_VALUE( 148.25) }, + INT32_C( 2), + { SIMDE_FLOAT16_VALUE( 18624.00), SIMDE_FLOAT16_VALUE(-19920.00), SIMDE_FLOAT16_VALUE( 8288.00), SIMDE_FLOAT16_VALUE( -9184.00), + SIMDE_FLOAT16_VALUE(-20576.00), SIMDE_FLOAT16_VALUE( 21408.00), SIMDE_FLOAT16_VALUE( 22928.00), SIMDE_FLOAT16_VALUE(-21728.00) } }, + { { SIMDE_FLOAT16_VALUE( 213.88), SIMDE_FLOAT16_VALUE( 337.75), SIMDE_FLOAT16_VALUE( 330.50), SIMDE_FLOAT16_VALUE( -88.56), + SIMDE_FLOAT16_VALUE( 191.12), SIMDE_FLOAT16_VALUE( -615.00), SIMDE_FLOAT16_VALUE( -113.25), SIMDE_FLOAT16_VALUE( 775.50) }, + { SIMDE_FLOAT16_VALUE( 295.50), SIMDE_FLOAT16_VALUE( 687.00), SIMDE_FLOAT16_VALUE( 406.25), SIMDE_FLOAT16_VALUE( 439.50), + SIMDE_FLOAT16_VALUE( -827.50), SIMDE_FLOAT16_VALUE( 733.00), SIMDE_FLOAT16_VALUE( 499.00), SIMDE_FLOAT16_VALUE( 931.00) }, + { SIMDE_FLOAT16_VALUE( 790.00), SIMDE_FLOAT16_VALUE( -979.00), SIMDE_FLOAT16_VALUE( 70.62), SIMDE_FLOAT16_VALUE( -47.00), + SIMDE_FLOAT16_VALUE( 228.50), SIMDE_FLOAT16_VALUE( -233.50), SIMDE_FLOAT16_VALUE( -467.50), SIMDE_FLOAT16_VALUE( 545.00) }, + INT32_C( 3), + { SIMDE_FLOAT16_VALUE( 32496.00), SIMDE_FLOAT16_VALUE(-31952.00), SIMDE_FLOAT16_VALUE( 20992.00), SIMDE_FLOAT16_VALUE(-20752.00), + SIMDE_FLOAT16_VALUE( 34656.00), SIMDE_FLOAT16_VALUE(-35072.00), SIMDE_FLOAT16_VALUE( 43648.00), SIMDE_FLOAT16_VALUE(-42976.00) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float16x8_t r_ = simde_vld1q_f16(test_vec[i].r_); simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); simde_float16x8_t r; - SIMDE_CONSTIFY_4_( - simde_vcmlaq_rot90_laneq_f16, r, - (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), - test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_4_(simde_vcmlaq_rot90_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, r_, a, b); - simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), - 1); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 2, 3, 0, 1, 2, 3}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; + for (int i = 0 ; i < 8 ; i++) { simde_float16x8_t r_ = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-1000.0f, 1000.0f); simde_float16x8_t r = simde_vcmlaq_laneq_f16(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f16x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); @@ -872,7 +642,8 @@ static int test_simde_vcmlaq_rot90_laneq_f16(SIMDE_MUNIT_TEST_ARGS) { #endif } -static int test_simde_vcmlaq_rot90_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { +static int +test_simde_vcmlaq_rot90_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t r_[4]; @@ -881,103 +652,68 @@ static int test_simde_vcmlaq_rot90_laneq_f32(SIMDE_MUNIT_TEST_ARGS) { const int lane; simde_float32_t r[4]; } test_vec[] = { - {{SIMDE_FLOAT32_C(355.18), SIMDE_FLOAT32_C(169.63), - SIMDE_FLOAT32_C(116.87), SIMDE_FLOAT32_C(-467.19)}, - {SIMDE_FLOAT32_C(-513.94), SIMDE_FLOAT32_C(-999.71), - SIMDE_FLOAT32_C(-285.25), SIMDE_FLOAT32_C(-931.88)}, - {SIMDE_FLOAT32_C(-839.57), SIMDE_FLOAT32_C(-681.40), - SIMDE_FLOAT32_C(-117.60), SIMDE_FLOAT32_C(-459.86)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-838971.375000), SIMDE_FLOAT32_C(839496.187500), - SIMDE_FLOAT32_C(-782261.625000), SIMDE_FLOAT32_C(781911.312500)}}, - {{SIMDE_FLOAT32_C(-57.67), SIMDE_FLOAT32_C(-897.07), - SIMDE_FLOAT32_C(118.98), SIMDE_FLOAT32_C(-387.92)}, - {SIMDE_FLOAT32_C(-362.79), SIMDE_FLOAT32_C(160.99), - SIMDE_FLOAT32_C(-2.72), SIMDE_FLOAT32_C(206.65)}, - {SIMDE_FLOAT32_C(49.34), SIMDE_FLOAT32_C(511.85), - SIMDE_FLOAT32_C(547.20), SIMDE_FLOAT32_C(-119.58)}, - INT32_C(1), - {SIMDE_FLOAT32_C(-82460.406250), SIMDE_FLOAT32_C(81505.664062), - SIMDE_FLOAT32_C(-105654.820312), SIMDE_FLOAT32_C(105385.882812)}}, - {{SIMDE_FLOAT32_C(-219.54), SIMDE_FLOAT32_C(-959.14), - SIMDE_FLOAT32_C(943.92), SIMDE_FLOAT32_C(628.48)}, - {SIMDE_FLOAT32_C(446.65), SIMDE_FLOAT32_C(-500.77), - SIMDE_FLOAT32_C(-347.79), SIMDE_FLOAT32_C(813.11)}, - {SIMDE_FLOAT32_C(-542.25), SIMDE_FLOAT32_C(232.48), - SIMDE_FLOAT32_C(684.35), SIMDE_FLOAT32_C(710.26)}, - INT32_C(0), - {SIMDE_FLOAT32_C(-271762.062500), SIMDE_FLOAT32_C(270583.375000), - SIMDE_FLOAT32_C(441852.812500), SIMDE_FLOAT32_C(-440280.406250)}}, - {{SIMDE_FLOAT32_C(783.09), SIMDE_FLOAT32_C(-727.02), - SIMDE_FLOAT32_C(-586.46), SIMDE_FLOAT32_C(64.33)}, - {SIMDE_FLOAT32_C(-490.08), SIMDE_FLOAT32_C(740.49), - SIMDE_FLOAT32_C(-591.56), SIMDE_FLOAT32_C(-759.78)}, - {SIMDE_FLOAT32_C(-380.84), SIMDE_FLOAT32_C(993.01), - SIMDE_FLOAT32_C(-759.56), SIMDE_FLOAT32_C(861.16)}, - INT32_C(1), - {SIMDE_FLOAT32_C(-734530.875000), SIMDE_FLOAT32_C(734586.937500), - SIMDE_FLOAT32_C(753882.687500), SIMDE_FLOAT32_C(-754404.875000)}}, - {{SIMDE_FLOAT32_C(998.31), SIMDE_FLOAT32_C(538.40), - SIMDE_FLOAT32_C(-191.12), SIMDE_FLOAT32_C(-434.48)}, - {SIMDE_FLOAT32_C(592.83), SIMDE_FLOAT32_C(820.32), - SIMDE_FLOAT32_C(-296.84), SIMDE_FLOAT32_C(-612.30)}, - {SIMDE_FLOAT32_C(-552.34), SIMDE_FLOAT32_C(329.08), - SIMDE_FLOAT32_C(765.26), SIMDE_FLOAT32_C(-531.08)}, - INT32_C(0), - {SIMDE_FLOAT32_C(454093.875000), SIMDE_FLOAT32_C(-452557.187500), - SIMDE_FLOAT32_C(-338388.906250), SIMDE_FLOAT32_C(337763.312500)}}, - {{SIMDE_FLOAT32_C(52.61), SIMDE_FLOAT32_C(606.93), - SIMDE_FLOAT32_C(-894.22), SIMDE_FLOAT32_C(-854.38)}, - {SIMDE_FLOAT32_C(972.80), SIMDE_FLOAT32_C(-807.39), - SIMDE_FLOAT32_C(668.59), SIMDE_FLOAT32_C(-228.19)}, - {SIMDE_FLOAT32_C(-528.51), SIMDE_FLOAT32_C(730.93), - SIMDE_FLOAT32_C(-230.95), SIMDE_FLOAT32_C(-140.17)}, - INT32_C(1), - {SIMDE_FLOAT32_C(590198.187500), SIMDE_FLOAT32_C(-589538.625000), - SIMDE_FLOAT32_C(165896.703125), SIMDE_FLOAT32_C(-167645.296875)}}, - {{SIMDE_FLOAT32_C(556.73), SIMDE_FLOAT32_C(-701.90), - SIMDE_FLOAT32_C(-356.50), SIMDE_FLOAT32_C(-532.42)}, - {SIMDE_FLOAT32_C(856.94), SIMDE_FLOAT32_C(-261.67), - SIMDE_FLOAT32_C(-208.07), SIMDE_FLOAT32_C(27.93)}, - {SIMDE_FLOAT32_C(924.32), SIMDE_FLOAT32_C(-863.60), - SIMDE_FLOAT32_C(-687.65), SIMDE_FLOAT32_C(238.39)}, - INT32_C(0), - {SIMDE_FLOAT32_C(242423.562500), SIMDE_FLOAT32_C(-242568.734375), - SIMDE_FLOAT32_C(-26172.757812), SIMDE_FLOAT32_C(25283.837891)}}, - {{SIMDE_FLOAT32_C(-286.79), SIMDE_FLOAT32_C(630.61), - SIMDE_FLOAT32_C(-989.22), SIMDE_FLOAT32_C(223.21)}, - {SIMDE_FLOAT32_C(812.31), SIMDE_FLOAT32_C(667.33), - SIMDE_FLOAT32_C(841.41), SIMDE_FLOAT32_C(735.52)}, - {SIMDE_FLOAT32_C(308.52), SIMDE_FLOAT32_C(-189.06), - SIMDE_FLOAT32_C(-63.33), SIMDE_FLOAT32_C(837.76)}, - INT32_C(1), - {SIMDE_FLOAT32_C(125878.625000), SIMDE_FLOAT32_C(-125534.804688), - SIMDE_FLOAT32_C(138068.187500), SIMDE_FLOAT32_C(-138834.203125)}}}; - - for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + { { SIMDE_FLOAT32_C( 355.18), SIMDE_FLOAT32_C( 169.63), SIMDE_FLOAT32_C( 116.87), SIMDE_FLOAT32_C( -467.19) }, + { SIMDE_FLOAT32_C( -513.94), SIMDE_FLOAT32_C( -999.71), SIMDE_FLOAT32_C( -285.25), SIMDE_FLOAT32_C( -931.88) }, + { SIMDE_FLOAT32_C( -839.57), SIMDE_FLOAT32_C( -681.40), SIMDE_FLOAT32_C( -117.60), SIMDE_FLOAT32_C( -459.86) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-838971.375000), SIMDE_FLOAT32_C(839496.187500), SIMDE_FLOAT32_C(-782261.625000), SIMDE_FLOAT32_C(781911.312500) } }, + { { SIMDE_FLOAT32_C( -57.67), SIMDE_FLOAT32_C( -897.07), SIMDE_FLOAT32_C( 118.98), SIMDE_FLOAT32_C( -387.92) }, + { SIMDE_FLOAT32_C( -362.79), SIMDE_FLOAT32_C( 160.99), SIMDE_FLOAT32_C( -2.72), SIMDE_FLOAT32_C( 206.65) }, + { SIMDE_FLOAT32_C( 49.34), SIMDE_FLOAT32_C( 511.85), SIMDE_FLOAT32_C( 547.20), SIMDE_FLOAT32_C( -119.58) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-82460.406250), SIMDE_FLOAT32_C(81505.664062), SIMDE_FLOAT32_C(-105654.820312), SIMDE_FLOAT32_C(105385.882812) } }, + { { SIMDE_FLOAT32_C( -219.54), SIMDE_FLOAT32_C( -959.14), SIMDE_FLOAT32_C( 943.92), SIMDE_FLOAT32_C( 628.48) }, + { SIMDE_FLOAT32_C( 446.65), SIMDE_FLOAT32_C( -500.77), SIMDE_FLOAT32_C( -347.79), SIMDE_FLOAT32_C( 813.11) }, + { SIMDE_FLOAT32_C( -542.25), SIMDE_FLOAT32_C( 232.48), SIMDE_FLOAT32_C( 684.35), SIMDE_FLOAT32_C( 710.26) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(-271762.062500), SIMDE_FLOAT32_C(270583.375000), SIMDE_FLOAT32_C(441852.812500), SIMDE_FLOAT32_C(-440280.406250) } }, + { { SIMDE_FLOAT32_C( 783.09), SIMDE_FLOAT32_C( -727.02), SIMDE_FLOAT32_C( -586.46), SIMDE_FLOAT32_C( 64.33) }, + { SIMDE_FLOAT32_C( -490.08), SIMDE_FLOAT32_C( 740.49), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -759.78) }, + { SIMDE_FLOAT32_C( -380.84), SIMDE_FLOAT32_C( 993.01), SIMDE_FLOAT32_C( -759.56), SIMDE_FLOAT32_C( 861.16) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(-734530.875000), SIMDE_FLOAT32_C(734586.937500), SIMDE_FLOAT32_C(753882.687500), SIMDE_FLOAT32_C(-754404.875000) } }, + { { SIMDE_FLOAT32_C( 998.31), SIMDE_FLOAT32_C( 538.40), SIMDE_FLOAT32_C( -191.12), SIMDE_FLOAT32_C( -434.48) }, + { SIMDE_FLOAT32_C( 592.83), SIMDE_FLOAT32_C( 820.32), SIMDE_FLOAT32_C( -296.84), SIMDE_FLOAT32_C( -612.30) }, + { SIMDE_FLOAT32_C( -552.34), SIMDE_FLOAT32_C( 329.08), SIMDE_FLOAT32_C( 765.26), SIMDE_FLOAT32_C( -531.08) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(454093.875000), SIMDE_FLOAT32_C(-452557.187500), SIMDE_FLOAT32_C(-338388.906250), SIMDE_FLOAT32_C(337763.312500) } }, + { { SIMDE_FLOAT32_C( 52.61), SIMDE_FLOAT32_C( 606.93), SIMDE_FLOAT32_C( -894.22), SIMDE_FLOAT32_C( -854.38) }, + { SIMDE_FLOAT32_C( 972.80), SIMDE_FLOAT32_C( -807.39), SIMDE_FLOAT32_C( 668.59), SIMDE_FLOAT32_C( -228.19) }, + { SIMDE_FLOAT32_C( -528.51), SIMDE_FLOAT32_C( 730.93), SIMDE_FLOAT32_C( -230.95), SIMDE_FLOAT32_C( -140.17) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(590198.187500), SIMDE_FLOAT32_C(-589538.625000), SIMDE_FLOAT32_C(165896.703125), SIMDE_FLOAT32_C(-167645.296875) } }, + { { SIMDE_FLOAT32_C( 556.73), SIMDE_FLOAT32_C( -701.90), SIMDE_FLOAT32_C( -356.50), SIMDE_FLOAT32_C( -532.42) }, + { SIMDE_FLOAT32_C( 856.94), SIMDE_FLOAT32_C( -261.67), SIMDE_FLOAT32_C( -208.07), SIMDE_FLOAT32_C( 27.93) }, + { SIMDE_FLOAT32_C( 924.32), SIMDE_FLOAT32_C( -863.60), SIMDE_FLOAT32_C( -687.65), SIMDE_FLOAT32_C( 238.39) }, + INT32_C( 0), + { SIMDE_FLOAT32_C(242423.562500), SIMDE_FLOAT32_C(-242568.734375), SIMDE_FLOAT32_C(-26172.757812), SIMDE_FLOAT32_C(25283.837891) } }, + { { SIMDE_FLOAT32_C( -286.79), SIMDE_FLOAT32_C( 630.61), SIMDE_FLOAT32_C( -989.22), SIMDE_FLOAT32_C( 223.21) }, + { SIMDE_FLOAT32_C( 812.31), SIMDE_FLOAT32_C( 667.33), SIMDE_FLOAT32_C( 841.41), SIMDE_FLOAT32_C( 735.52) }, + { SIMDE_FLOAT32_C( 308.52), SIMDE_FLOAT32_C( -189.06), SIMDE_FLOAT32_C( -63.33), SIMDE_FLOAT32_C( 837.76) }, + INT32_C( 1), + { SIMDE_FLOAT32_C(125878.625000), SIMDE_FLOAT32_C(-125534.804688), SIMDE_FLOAT32_C(138068.187500), SIMDE_FLOAT32_C(-138834.203125) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t r_ = simde_vld1q_f32(test_vec[i].r_); simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r; - SIMDE_CONSTIFY_2_( - simde_vcmlaq_rot90_laneq_f32, r, - (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), - test_vec[i].lane, r_, a, b); + SIMDE_CONSTIFY_2_(simde_vcmlaq_rot90_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, r_, a, b); - simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), - 1); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } - return 0; #else fputc('\n', stdout); - const int lanes[] = {0, 1, 0, 1, 0, 1, 0, 1}; - for (int i = 0; i < 8; i++) { + const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t r_ = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcmlaq_rot90_laneq_f32(r_, a, b, lanes[i]); + simde_test_arm_neon_write_f32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE);