diff --git a/meson.build b/meson.build index cb25fc094..5b211ba07 100644 --- a/meson.build +++ b/meson.build @@ -356,6 +356,7 @@ simde_avx512_families = [ 'fixupimm_round', 'flushsubnormal', 'fmadd', + 'fmaddsub', 'fmsub', 'fnmadd', 'fnmsub', diff --git a/simde/x86/avx512.h b/simde/x86/avx512.h index 103b46661..e3654bc37 100644 --- a/simde/x86/avx512.h +++ b/simde/x86/avx512.h @@ -69,6 +69,7 @@ #include "avx512/fixupimm_round.h" #include "avx512/flushsubnormal.h" #include "avx512/fmadd.h" +#include "avx512/fmaddsub.h" #include "avx512/fmsub.h" #include "avx512/fnmadd.h" #include "avx512/fnmsub.h" diff --git a/simde/x86/avx512/fmaddsub.h b/simde/x86/avx512/fmaddsub.h new file mode 100644 index 000000000..f1139e4d6 --- /dev/null +++ b/simde/x86/avx512/fmaddsub.h @@ -0,0 +1,91 @@ +#if !defined(SIMDE_X86_AVX512_FMADDSUB_H) +#define SIMDE_X86_AVX512_FMADDSUB_H + +#include "types.h" +#include "../fma.h" +#include "mul.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_x_mm512_addsub_pd (simde__m512d a, simde__m512d b) { + //mm512_addsub_pd does not exist, but we define it for utility purposes (only with simde_x prefix, no native alias) + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256d[0] = simde_mm256_addsub_pd(a_.m256d[0], b_.m256d[0]); + r_.m256d[1] = simde_mm256_addsub_pd(a_.m256d[1], b_.m256d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; + } + #endif + + return simde__m512d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_x_mm512_addsub_ps (simde__m512 a, simde__m512 b) { + //mm512_addsub_ps does not exist, but we define it for utility purposes (only with simde_x prefix, no native alias) + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256[0] = simde_mm256_addsub_ps(a_.m256[0], b_.m256[0]); + r_.m256[1] = simde_mm256_addsub_ps(a_.m256[1], b_.m256[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; + } + #endif + + return simde__m512_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_fmaddsub_pd (simde__m512d a, simde__m512d b, simde__m512d c) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_fmaddsub_pd(a, b, c); + #else + return simde_x_mm512_addsub_pd(simde_mm512_mul_pd(a, b), c); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_fmaddsub_pd + #define _mm512_fmaddsub_pd(a, b, c) simde_mm512_fmaddsub_pd(a, b, c) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_fmaddsub_ps (simde__m512 a, simde__m512 b, simde__m512 c) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_fmaddsub_ps(a, b, c); + #else + return simde_x_mm512_addsub_ps(simde_mm512_mul_ps(a, b), c); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_fmaddsub_ps + #define _mm512_fmaddsub_ps(a, b, c) simde_mm512_fmaddsub_ps(a, b, c) +#endif + + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_FMADDSUB_H) */ diff --git a/test/x86/avx512/fmaddsub.c b/test/x86/avx512/fmaddsub.c new file mode 100644 index 000000000..9aa2ee741 --- /dev/null +++ b/test/x86/avx512/fmaddsub.c @@ -0,0 +1,431 @@ +#include "test/test.h" +#define SIMDE_TEST_X86_AVX512_INSN fmaddsub + +#include +#include + +static int +test_simde_x_mm512_addsub_pd (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + const simde_float64 a[8]; + const simde_float64 b[8]; + const simde_float64 r[8]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( -988.25), SIMDE_FLOAT64_C( 740.20), SIMDE_FLOAT64_C( 526.01), SIMDE_FLOAT64_C( -750.17), + SIMDE_FLOAT64_C( 752.29), SIMDE_FLOAT64_C( 598.86), SIMDE_FLOAT64_C( -890.37), SIMDE_FLOAT64_C( -96.80) }, + { SIMDE_FLOAT64_C( -562.73), SIMDE_FLOAT64_C( -919.61), SIMDE_FLOAT64_C( -409.96), SIMDE_FLOAT64_C( -333.44), + SIMDE_FLOAT64_C( 759.04), SIMDE_FLOAT64_C( 814.32), SIMDE_FLOAT64_C( -14.88), SIMDE_FLOAT64_C( 880.46) }, + { SIMDE_FLOAT64_C( -425.52), SIMDE_FLOAT64_C( -179.41), SIMDE_FLOAT64_C( 935.97), SIMDE_FLOAT64_C( -1083.61), + SIMDE_FLOAT64_C( -6.75), SIMDE_FLOAT64_C( 1413.18), SIMDE_FLOAT64_C( -875.49), SIMDE_FLOAT64_C( 783.66) } }, + { { SIMDE_FLOAT64_C( 355.05), SIMDE_FLOAT64_C( 639.68), SIMDE_FLOAT64_C( -612.70), SIMDE_FLOAT64_C( -466.72), + SIMDE_FLOAT64_C( 417.36), SIMDE_FLOAT64_C( 529.58), SIMDE_FLOAT64_C( 432.00), SIMDE_FLOAT64_C( 272.29) }, + { SIMDE_FLOAT64_C( 644.25), SIMDE_FLOAT64_C( -41.32), SIMDE_FLOAT64_C( 47.37), SIMDE_FLOAT64_C( 624.85), + SIMDE_FLOAT64_C( -900.74), SIMDE_FLOAT64_C( -25.58), SIMDE_FLOAT64_C( 352.65), SIMDE_FLOAT64_C( -889.00) }, + { SIMDE_FLOAT64_C( -289.20), SIMDE_FLOAT64_C( 598.36), SIMDE_FLOAT64_C( -660.07), SIMDE_FLOAT64_C( 158.13), + SIMDE_FLOAT64_C( 1318.10), SIMDE_FLOAT64_C( 504.00), SIMDE_FLOAT64_C( 79.35), SIMDE_FLOAT64_C( -616.71) } }, + { { SIMDE_FLOAT64_C( -285.39), SIMDE_FLOAT64_C( -121.35), SIMDE_FLOAT64_C( -639.17), SIMDE_FLOAT64_C( -533.10), + SIMDE_FLOAT64_C( -522.48), SIMDE_FLOAT64_C( -529.53), SIMDE_FLOAT64_C( 370.11), SIMDE_FLOAT64_C( -85.21) }, + { SIMDE_FLOAT64_C( -449.14), SIMDE_FLOAT64_C( 960.15), SIMDE_FLOAT64_C( 581.35), SIMDE_FLOAT64_C( -690.10), + SIMDE_FLOAT64_C( 774.47), SIMDE_FLOAT64_C( -433.53), SIMDE_FLOAT64_C( -809.64), SIMDE_FLOAT64_C( 129.52) }, + { SIMDE_FLOAT64_C( 163.75), SIMDE_FLOAT64_C( 838.80), SIMDE_FLOAT64_C( -1220.52), SIMDE_FLOAT64_C( -1223.20), + SIMDE_FLOAT64_C( -1296.95), SIMDE_FLOAT64_C( -963.06), SIMDE_FLOAT64_C( 1179.75), SIMDE_FLOAT64_C( 44.31) } }, + { { SIMDE_FLOAT64_C( -793.85), SIMDE_FLOAT64_C( -422.34), SIMDE_FLOAT64_C( 662.80), SIMDE_FLOAT64_C( 623.51), + SIMDE_FLOAT64_C( -892.76), SIMDE_FLOAT64_C( 94.79), SIMDE_FLOAT64_C( -104.21), SIMDE_FLOAT64_C( 751.49) }, + { SIMDE_FLOAT64_C( -946.52), SIMDE_FLOAT64_C( 943.16), SIMDE_FLOAT64_C( 376.33), SIMDE_FLOAT64_C( -847.27), + SIMDE_FLOAT64_C( -82.42), SIMDE_FLOAT64_C( -271.02), SIMDE_FLOAT64_C( -736.27), SIMDE_FLOAT64_C( 632.19) }, + { SIMDE_FLOAT64_C( 152.67), SIMDE_FLOAT64_C( 520.82), SIMDE_FLOAT64_C( 286.47), SIMDE_FLOAT64_C( -223.76), + SIMDE_FLOAT64_C( -810.34), SIMDE_FLOAT64_C( -176.23), SIMDE_FLOAT64_C( 632.06), SIMDE_FLOAT64_C( 1383.68) } }, + { { SIMDE_FLOAT64_C( 607.64), SIMDE_FLOAT64_C( -375.43), SIMDE_FLOAT64_C( -900.91), SIMDE_FLOAT64_C( -914.85), + SIMDE_FLOAT64_C( 95.03), SIMDE_FLOAT64_C( 469.20), SIMDE_FLOAT64_C( -0.06), SIMDE_FLOAT64_C( 645.89) }, + { SIMDE_FLOAT64_C( 429.35), SIMDE_FLOAT64_C( -418.71), SIMDE_FLOAT64_C( 955.79), SIMDE_FLOAT64_C( 203.82), + SIMDE_FLOAT64_C( 147.75), SIMDE_FLOAT64_C( -853.85), SIMDE_FLOAT64_C( -666.66), SIMDE_FLOAT64_C( 353.90) }, + { SIMDE_FLOAT64_C( 178.29), SIMDE_FLOAT64_C( -794.14), SIMDE_FLOAT64_C( -1856.70), SIMDE_FLOAT64_C( -711.03), + SIMDE_FLOAT64_C( -52.72), SIMDE_FLOAT64_C( -384.65), SIMDE_FLOAT64_C( 666.60), SIMDE_FLOAT64_C( 999.79) } }, + { { SIMDE_FLOAT64_C( -276.19), SIMDE_FLOAT64_C( 996.14), SIMDE_FLOAT64_C( -22.59), SIMDE_FLOAT64_C( -168.95), + SIMDE_FLOAT64_C( 90.93), SIMDE_FLOAT64_C( 873.21), SIMDE_FLOAT64_C( -417.47), SIMDE_FLOAT64_C( 144.41) }, + { SIMDE_FLOAT64_C( 816.37), SIMDE_FLOAT64_C( 958.87), SIMDE_FLOAT64_C( 297.14), SIMDE_FLOAT64_C( -266.05), + SIMDE_FLOAT64_C( -312.15), SIMDE_FLOAT64_C( 560.87), SIMDE_FLOAT64_C( -633.86), SIMDE_FLOAT64_C( -704.51) }, + { SIMDE_FLOAT64_C( -1092.56), SIMDE_FLOAT64_C( 1955.01), SIMDE_FLOAT64_C( -319.73), SIMDE_FLOAT64_C( -435.00), + SIMDE_FLOAT64_C( 403.08), SIMDE_FLOAT64_C( 1434.08), SIMDE_FLOAT64_C( 216.39), SIMDE_FLOAT64_C( -560.10) } }, + { { SIMDE_FLOAT64_C( -814.56), SIMDE_FLOAT64_C( -534.77), SIMDE_FLOAT64_C( -619.36), SIMDE_FLOAT64_C( 280.47), + SIMDE_FLOAT64_C( 934.43), SIMDE_FLOAT64_C( 380.58), SIMDE_FLOAT64_C( -73.64), SIMDE_FLOAT64_C( 363.78) }, + { SIMDE_FLOAT64_C( 961.87), SIMDE_FLOAT64_C( -117.84), SIMDE_FLOAT64_C( -432.40), SIMDE_FLOAT64_C( 109.62), + SIMDE_FLOAT64_C( 28.31), SIMDE_FLOAT64_C( -99.06), SIMDE_FLOAT64_C( -536.48), SIMDE_FLOAT64_C( 752.12) }, + { SIMDE_FLOAT64_C( -1776.43), SIMDE_FLOAT64_C( -652.61), SIMDE_FLOAT64_C( -186.96), SIMDE_FLOAT64_C( 390.09), + SIMDE_FLOAT64_C( 906.12), SIMDE_FLOAT64_C( 281.52), SIMDE_FLOAT64_C( 462.84), SIMDE_FLOAT64_C( 1115.90) } }, + { { SIMDE_FLOAT64_C( -102.92), SIMDE_FLOAT64_C( 440.94), SIMDE_FLOAT64_C( -416.83), SIMDE_FLOAT64_C( 988.01), + SIMDE_FLOAT64_C( 314.14), SIMDE_FLOAT64_C( 165.70), SIMDE_FLOAT64_C( 132.42), SIMDE_FLOAT64_C( 130.51) }, + { SIMDE_FLOAT64_C( 124.57), SIMDE_FLOAT64_C( -570.44), SIMDE_FLOAT64_C( 864.46), SIMDE_FLOAT64_C( 812.42), + SIMDE_FLOAT64_C( 990.44), SIMDE_FLOAT64_C( -769.40), SIMDE_FLOAT64_C( -892.09), SIMDE_FLOAT64_C( -824.12) }, + { SIMDE_FLOAT64_C( -227.49), SIMDE_FLOAT64_C( -129.50), SIMDE_FLOAT64_C( -1281.29), SIMDE_FLOAT64_C( 1800.43), + SIMDE_FLOAT64_C( -676.30), SIMDE_FLOAT64_C( -603.70), SIMDE_FLOAT64_C( 1024.51), SIMDE_FLOAT64_C( -693.61) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); + simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); + simde__m512d r = simde_x_mm512_addsub_pd(a, b); + simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); + } + + return 0; +} + +static int +test_simde_x_mm512_addsub_ps (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + const simde_float32 a[16]; + const simde_float32 b[16]; + const simde_float32 r[16]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 285.51), SIMDE_FLOAT32_C( -644.97), SIMDE_FLOAT32_C( 473.77), SIMDE_FLOAT32_C( -254.06), + SIMDE_FLOAT32_C( -523.16), SIMDE_FLOAT32_C( 94.25), SIMDE_FLOAT32_C( 983.12), SIMDE_FLOAT32_C( -934.68), + SIMDE_FLOAT32_C( 386.15), SIMDE_FLOAT32_C( -882.42), SIMDE_FLOAT32_C( 26.14), SIMDE_FLOAT32_C( -130.97), + SIMDE_FLOAT32_C( 328.76), SIMDE_FLOAT32_C( 343.56), SIMDE_FLOAT32_C( 951.06), SIMDE_FLOAT32_C( -834.04) }, + { SIMDE_FLOAT32_C( -839.81), SIMDE_FLOAT32_C( -91.29), SIMDE_FLOAT32_C( -854.46), SIMDE_FLOAT32_C( 943.48), + SIMDE_FLOAT32_C( 753.86), SIMDE_FLOAT32_C( -767.73), SIMDE_FLOAT32_C( 686.92), SIMDE_FLOAT32_C( 711.62), + SIMDE_FLOAT32_C( -781.87), SIMDE_FLOAT32_C( -746.51), SIMDE_FLOAT32_C( 858.43), SIMDE_FLOAT32_C( 598.68), + SIMDE_FLOAT32_C( -183.72), SIMDE_FLOAT32_C( 743.58), SIMDE_FLOAT32_C( 735.93), SIMDE_FLOAT32_C( -898.22) }, + { SIMDE_FLOAT32_C( 1125.32), SIMDE_FLOAT32_C( -736.26), SIMDE_FLOAT32_C( 1328.23), SIMDE_FLOAT32_C( 689.42), + SIMDE_FLOAT32_C( -1277.02), SIMDE_FLOAT32_C( -673.48), SIMDE_FLOAT32_C( 296.20), SIMDE_FLOAT32_C( -223.06), + SIMDE_FLOAT32_C( 1168.02), SIMDE_FLOAT32_C( -1628.93), SIMDE_FLOAT32_C( -832.29), SIMDE_FLOAT32_C( 467.71), + SIMDE_FLOAT32_C( 512.48), SIMDE_FLOAT32_C( 1087.14), SIMDE_FLOAT32_C( 215.13), SIMDE_FLOAT32_C( -1732.26) } }, + { { SIMDE_FLOAT32_C( -901.40), SIMDE_FLOAT32_C( 209.70), SIMDE_FLOAT32_C( -152.28), SIMDE_FLOAT32_C( -424.56), + SIMDE_FLOAT32_C( -696.05), SIMDE_FLOAT32_C( -169.16), SIMDE_FLOAT32_C( -359.23), SIMDE_FLOAT32_C( 690.10), + SIMDE_FLOAT32_C( -51.58), SIMDE_FLOAT32_C( 666.91), SIMDE_FLOAT32_C( -440.87), SIMDE_FLOAT32_C( -722.82), + SIMDE_FLOAT32_C( 10.46), SIMDE_FLOAT32_C( -489.80), SIMDE_FLOAT32_C( -556.86), SIMDE_FLOAT32_C( 170.66) }, + { SIMDE_FLOAT32_C( 418.90), SIMDE_FLOAT32_C( -411.31), SIMDE_FLOAT32_C( 114.14), SIMDE_FLOAT32_C( 172.76), + SIMDE_FLOAT32_C( -179.04), SIMDE_FLOAT32_C( -198.93), SIMDE_FLOAT32_C( -115.62), SIMDE_FLOAT32_C( 39.08), + SIMDE_FLOAT32_C( 54.55), SIMDE_FLOAT32_C( -257.19), SIMDE_FLOAT32_C( -362.24), SIMDE_FLOAT32_C( 870.83), + SIMDE_FLOAT32_C( -513.61), SIMDE_FLOAT32_C( -626.31), SIMDE_FLOAT32_C( 972.62), SIMDE_FLOAT32_C( -415.00) }, + { SIMDE_FLOAT32_C( -1320.30), SIMDE_FLOAT32_C( -201.61), SIMDE_FLOAT32_C( -266.42), SIMDE_FLOAT32_C( -251.80), + SIMDE_FLOAT32_C( -517.01), SIMDE_FLOAT32_C( -368.09), SIMDE_FLOAT32_C( -243.61), SIMDE_FLOAT32_C( 729.18), + SIMDE_FLOAT32_C( -106.13), SIMDE_FLOAT32_C( 409.72), SIMDE_FLOAT32_C( -78.63), SIMDE_FLOAT32_C( 148.01), + SIMDE_FLOAT32_C( 524.07), SIMDE_FLOAT32_C( -1116.11), SIMDE_FLOAT32_C( -1529.48), SIMDE_FLOAT32_C( -244.34) } }, + { { SIMDE_FLOAT32_C( 583.39), SIMDE_FLOAT32_C( -179.66), SIMDE_FLOAT32_C( 160.44), SIMDE_FLOAT32_C( 887.33), + SIMDE_FLOAT32_C( 651.18), SIMDE_FLOAT32_C( 801.21), SIMDE_FLOAT32_C( 577.43), SIMDE_FLOAT32_C( -400.40), + SIMDE_FLOAT32_C( 468.11), SIMDE_FLOAT32_C( -863.43), SIMDE_FLOAT32_C( -123.22), SIMDE_FLOAT32_C( -521.42), + SIMDE_FLOAT32_C( -353.24), SIMDE_FLOAT32_C( 319.92), SIMDE_FLOAT32_C( 649.24), SIMDE_FLOAT32_C( -934.33) }, + { SIMDE_FLOAT32_C( 908.61), SIMDE_FLOAT32_C( -236.62), SIMDE_FLOAT32_C( 238.43), SIMDE_FLOAT32_C( -270.44), + SIMDE_FLOAT32_C( 564.44), SIMDE_FLOAT32_C( -877.19), SIMDE_FLOAT32_C( 768.64), SIMDE_FLOAT32_C( -381.00), + SIMDE_FLOAT32_C( -134.37), SIMDE_FLOAT32_C( -593.60), SIMDE_FLOAT32_C( -510.17), SIMDE_FLOAT32_C( 352.02), + SIMDE_FLOAT32_C( -219.91), SIMDE_FLOAT32_C( -537.55), SIMDE_FLOAT32_C( 937.01), SIMDE_FLOAT32_C( -636.53) }, + { SIMDE_FLOAT32_C( -325.22), SIMDE_FLOAT32_C( -416.28), SIMDE_FLOAT32_C( -77.99), SIMDE_FLOAT32_C( 616.89), + SIMDE_FLOAT32_C( 86.74), SIMDE_FLOAT32_C( -75.98), SIMDE_FLOAT32_C( -191.21), SIMDE_FLOAT32_C( -781.40), + SIMDE_FLOAT32_C( 602.48), SIMDE_FLOAT32_C( -1457.03), SIMDE_FLOAT32_C( 386.95), SIMDE_FLOAT32_C( -169.40), + SIMDE_FLOAT32_C( -133.33), SIMDE_FLOAT32_C( -217.63), SIMDE_FLOAT32_C( -287.77), SIMDE_FLOAT32_C( -1570.86) } }, + { { SIMDE_FLOAT32_C( 282.79), SIMDE_FLOAT32_C( 97.45), SIMDE_FLOAT32_C( -749.19), SIMDE_FLOAT32_C( -66.04), + SIMDE_FLOAT32_C( -101.34), SIMDE_FLOAT32_C( 828.24), SIMDE_FLOAT32_C( 533.56), SIMDE_FLOAT32_C( -633.23), + SIMDE_FLOAT32_C( 964.81), SIMDE_FLOAT32_C( -589.66), SIMDE_FLOAT32_C( -154.65), SIMDE_FLOAT32_C( -388.43), + SIMDE_FLOAT32_C( 730.27), SIMDE_FLOAT32_C( -505.41), SIMDE_FLOAT32_C( -322.76), SIMDE_FLOAT32_C( 638.87) }, + { SIMDE_FLOAT32_C( 257.97), SIMDE_FLOAT32_C( 915.67), SIMDE_FLOAT32_C( -631.56), SIMDE_FLOAT32_C( -177.59), + SIMDE_FLOAT32_C( -961.51), SIMDE_FLOAT32_C( -862.92), SIMDE_FLOAT32_C( 441.41), SIMDE_FLOAT32_C( -95.89), + SIMDE_FLOAT32_C( -456.52), SIMDE_FLOAT32_C( 931.24), SIMDE_FLOAT32_C( -743.87), SIMDE_FLOAT32_C( 323.57), + SIMDE_FLOAT32_C( -606.31), SIMDE_FLOAT32_C( -806.85), SIMDE_FLOAT32_C( 687.04), SIMDE_FLOAT32_C( 676.48) }, + { SIMDE_FLOAT32_C( 24.82), SIMDE_FLOAT32_C( 1013.12), SIMDE_FLOAT32_C( -117.63), SIMDE_FLOAT32_C( -243.63), + SIMDE_FLOAT32_C( 860.17), SIMDE_FLOAT32_C( -34.68), SIMDE_FLOAT32_C( 92.15), SIMDE_FLOAT32_C( -729.12), + SIMDE_FLOAT32_C( 1421.33), SIMDE_FLOAT32_C( 341.58), SIMDE_FLOAT32_C( 589.22), SIMDE_FLOAT32_C( -64.86), + SIMDE_FLOAT32_C( 1336.58), SIMDE_FLOAT32_C( -1312.26), SIMDE_FLOAT32_C( -1009.80), SIMDE_FLOAT32_C( 1315.35) } }, + { { SIMDE_FLOAT32_C( 290.60), SIMDE_FLOAT32_C( 937.85), SIMDE_FLOAT32_C( -389.56), SIMDE_FLOAT32_C( -810.74), + SIMDE_FLOAT32_C( 766.10), SIMDE_FLOAT32_C( -856.00), SIMDE_FLOAT32_C( -443.97), SIMDE_FLOAT32_C( 730.91), + SIMDE_FLOAT32_C( -445.65), SIMDE_FLOAT32_C( 401.38), SIMDE_FLOAT32_C( -657.52), SIMDE_FLOAT32_C( -715.39), + SIMDE_FLOAT32_C( 895.98), SIMDE_FLOAT32_C( 19.72), SIMDE_FLOAT32_C( 923.49), SIMDE_FLOAT32_C( 153.95) }, + { SIMDE_FLOAT32_C( -64.60), SIMDE_FLOAT32_C( -708.08), SIMDE_FLOAT32_C( 976.36), SIMDE_FLOAT32_C( -26.12), + SIMDE_FLOAT32_C( -571.00), SIMDE_FLOAT32_C( 417.77), SIMDE_FLOAT32_C( 877.99), SIMDE_FLOAT32_C( -27.51), + SIMDE_FLOAT32_C( 349.02), SIMDE_FLOAT32_C( -865.87), SIMDE_FLOAT32_C( -703.94), SIMDE_FLOAT32_C( 742.71), + SIMDE_FLOAT32_C( -672.73), SIMDE_FLOAT32_C( 983.10), SIMDE_FLOAT32_C( 419.19), SIMDE_FLOAT32_C( 617.87) }, + { SIMDE_FLOAT32_C( 355.20), SIMDE_FLOAT32_C( 229.77), SIMDE_FLOAT32_C( -1365.92), SIMDE_FLOAT32_C( -836.86), + SIMDE_FLOAT32_C( 1337.10), SIMDE_FLOAT32_C( -438.23), SIMDE_FLOAT32_C( -1321.96), SIMDE_FLOAT32_C( 703.40), + SIMDE_FLOAT32_C( -794.67), SIMDE_FLOAT32_C( -464.49), SIMDE_FLOAT32_C( 46.42), SIMDE_FLOAT32_C( 27.32), + SIMDE_FLOAT32_C( 1568.71), SIMDE_FLOAT32_C( 1002.82), SIMDE_FLOAT32_C( 504.30), SIMDE_FLOAT32_C( 771.82) } }, + { { SIMDE_FLOAT32_C( 920.95), SIMDE_FLOAT32_C( -970.37), SIMDE_FLOAT32_C( 807.13), SIMDE_FLOAT32_C( 687.05), + SIMDE_FLOAT32_C( -826.37), SIMDE_FLOAT32_C( -636.84), SIMDE_FLOAT32_C( 417.96), SIMDE_FLOAT32_C( -272.02), + SIMDE_FLOAT32_C( 764.55), SIMDE_FLOAT32_C( 760.44), SIMDE_FLOAT32_C( 12.59), SIMDE_FLOAT32_C( 660.52), + SIMDE_FLOAT32_C( -219.84), SIMDE_FLOAT32_C( -63.92), SIMDE_FLOAT32_C( -185.53), SIMDE_FLOAT32_C( 715.56) }, + { SIMDE_FLOAT32_C( 228.00), SIMDE_FLOAT32_C( -209.17), SIMDE_FLOAT32_C( -310.56), SIMDE_FLOAT32_C( 657.01), + SIMDE_FLOAT32_C( -791.40), SIMDE_FLOAT32_C( -432.57), SIMDE_FLOAT32_C( -370.50), SIMDE_FLOAT32_C( 557.62), + SIMDE_FLOAT32_C( -298.44), SIMDE_FLOAT32_C( -74.45), SIMDE_FLOAT32_C( 300.33), SIMDE_FLOAT32_C( 28.83), + SIMDE_FLOAT32_C( -91.34), SIMDE_FLOAT32_C( -280.48), SIMDE_FLOAT32_C( -353.30), SIMDE_FLOAT32_C( -170.39) }, + { SIMDE_FLOAT32_C( 692.95), SIMDE_FLOAT32_C( -1179.54), SIMDE_FLOAT32_C( 1117.69), SIMDE_FLOAT32_C( 1344.06), + SIMDE_FLOAT32_C( -34.97), SIMDE_FLOAT32_C( -1069.41), SIMDE_FLOAT32_C( 788.46), SIMDE_FLOAT32_C( 285.60), + SIMDE_FLOAT32_C( 1062.99), SIMDE_FLOAT32_C( 685.99), SIMDE_FLOAT32_C( -287.74), SIMDE_FLOAT32_C( 689.35), + SIMDE_FLOAT32_C( -128.50), SIMDE_FLOAT32_C( -344.40), SIMDE_FLOAT32_C( 167.77), SIMDE_FLOAT32_C( 545.17) } }, + { { SIMDE_FLOAT32_C( -250.86), SIMDE_FLOAT32_C( -546.17), SIMDE_FLOAT32_C( -483.34), SIMDE_FLOAT32_C( -77.23), + SIMDE_FLOAT32_C( -183.01), SIMDE_FLOAT32_C( 934.62), SIMDE_FLOAT32_C( 650.75), SIMDE_FLOAT32_C( -418.46), + SIMDE_FLOAT32_C( 695.06), SIMDE_FLOAT32_C( -336.66), SIMDE_FLOAT32_C( -757.94), SIMDE_FLOAT32_C( -524.78), + SIMDE_FLOAT32_C( 599.42), SIMDE_FLOAT32_C( 56.53), SIMDE_FLOAT32_C( -809.22), SIMDE_FLOAT32_C( -172.57) }, + { SIMDE_FLOAT32_C( 847.36), SIMDE_FLOAT32_C( -119.78), SIMDE_FLOAT32_C( -515.56), SIMDE_FLOAT32_C( -944.04), + SIMDE_FLOAT32_C( 447.65), SIMDE_FLOAT32_C( 113.93), SIMDE_FLOAT32_C( 613.58), SIMDE_FLOAT32_C( -850.79), + SIMDE_FLOAT32_C( -960.51), SIMDE_FLOAT32_C( -86.09), SIMDE_FLOAT32_C( 178.04), SIMDE_FLOAT32_C( -51.86), + SIMDE_FLOAT32_C( 633.43), SIMDE_FLOAT32_C( 824.74), SIMDE_FLOAT32_C( 777.76), SIMDE_FLOAT32_C( -617.43) }, + { SIMDE_FLOAT32_C( -1098.22), SIMDE_FLOAT32_C( -665.95), SIMDE_FLOAT32_C( 32.22), SIMDE_FLOAT32_C( -1021.27), + SIMDE_FLOAT32_C( -630.66), SIMDE_FLOAT32_C( 1048.55), SIMDE_FLOAT32_C( 37.17), SIMDE_FLOAT32_C( -1269.25), + SIMDE_FLOAT32_C( 1655.57), SIMDE_FLOAT32_C( -422.75), SIMDE_FLOAT32_C( -935.98), SIMDE_FLOAT32_C( -576.64), + SIMDE_FLOAT32_C( -34.01), SIMDE_FLOAT32_C( 881.27), SIMDE_FLOAT32_C( -1586.98), SIMDE_FLOAT32_C( -790.00) } }, + { { SIMDE_FLOAT32_C( -721.42), SIMDE_FLOAT32_C( -705.58), SIMDE_FLOAT32_C( 305.34), SIMDE_FLOAT32_C( 95.57), + SIMDE_FLOAT32_C( -770.96), SIMDE_FLOAT32_C( -43.91), SIMDE_FLOAT32_C( 677.11), SIMDE_FLOAT32_C( 924.09), + SIMDE_FLOAT32_C( 619.43), SIMDE_FLOAT32_C( 919.17), SIMDE_FLOAT32_C( -600.69), SIMDE_FLOAT32_C( 218.85), + SIMDE_FLOAT32_C( -24.30), SIMDE_FLOAT32_C( -409.91), SIMDE_FLOAT32_C( -953.72), SIMDE_FLOAT32_C( -176.95) }, + { SIMDE_FLOAT32_C( 470.30), SIMDE_FLOAT32_C( -469.29), SIMDE_FLOAT32_C( -120.98), SIMDE_FLOAT32_C( -82.05), + SIMDE_FLOAT32_C( 644.65), SIMDE_FLOAT32_C( -507.40), SIMDE_FLOAT32_C( 67.17), SIMDE_FLOAT32_C( 684.13), + SIMDE_FLOAT32_C( 406.51), SIMDE_FLOAT32_C( -754.79), SIMDE_FLOAT32_C( -367.72), SIMDE_FLOAT32_C( 39.94), + SIMDE_FLOAT32_C( -930.05), SIMDE_FLOAT32_C( -589.97), SIMDE_FLOAT32_C( 422.51), SIMDE_FLOAT32_C( -651.47) }, + { SIMDE_FLOAT32_C( -1191.72), SIMDE_FLOAT32_C( -1174.87), SIMDE_FLOAT32_C( 426.32), SIMDE_FLOAT32_C( 13.52), + SIMDE_FLOAT32_C( -1415.61), SIMDE_FLOAT32_C( -551.31), SIMDE_FLOAT32_C( 609.94), SIMDE_FLOAT32_C( 1608.22), + SIMDE_FLOAT32_C( 212.92), SIMDE_FLOAT32_C( 164.38), SIMDE_FLOAT32_C( -232.97), SIMDE_FLOAT32_C( 258.79), + SIMDE_FLOAT32_C( 905.75), SIMDE_FLOAT32_C( -999.88), SIMDE_FLOAT32_C( -1376.23), SIMDE_FLOAT32_C( -828.42) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); + simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); + simde__m512 r = simde_x_mm512_addsub_ps(a, b); + simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); + } + + return 0; +} + +static int +test_simde_mm512_fmaddsub_pd (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + const simde_float64 a[8]; + const simde_float64 b[8]; + const simde_float64 c[8]; + const simde_float64 r[8]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( 541.38), SIMDE_FLOAT64_C( 469.79), SIMDE_FLOAT64_C( 553.64), SIMDE_FLOAT64_C( 623.98), + SIMDE_FLOAT64_C( -736.69), SIMDE_FLOAT64_C( 819.10), SIMDE_FLOAT64_C( -628.92), SIMDE_FLOAT64_C( -118.52) }, + { SIMDE_FLOAT64_C( -6.67), SIMDE_FLOAT64_C( -728.20), SIMDE_FLOAT64_C( 147.63), SIMDE_FLOAT64_C( -558.48), + SIMDE_FLOAT64_C( 233.17), SIMDE_FLOAT64_C( 947.66), SIMDE_FLOAT64_C( 742.34), SIMDE_FLOAT64_C( 487.37) }, + { SIMDE_FLOAT64_C( 556.43), SIMDE_FLOAT64_C( 273.11), SIMDE_FLOAT64_C( -452.29), SIMDE_FLOAT64_C( -893.09), + SIMDE_FLOAT64_C( 144.90), SIMDE_FLOAT64_C( -471.58), SIMDE_FLOAT64_C( 601.15), SIMDE_FLOAT64_C( -559.53) }, + { SIMDE_FLOAT64_C( -4167.43), SIMDE_FLOAT64_C(-341827.97), SIMDE_FLOAT64_C( 82186.16), SIMDE_FLOAT64_C(-349373.44), + SIMDE_FLOAT64_C(-171918.91), SIMDE_FLOAT64_C(775756.73), SIMDE_FLOAT64_C(-467473.62), SIMDE_FLOAT64_C(-58322.62) } }, + { { SIMDE_FLOAT64_C( 318.18), SIMDE_FLOAT64_C( -43.15), SIMDE_FLOAT64_C( -636.72), SIMDE_FLOAT64_C( -628.47), + SIMDE_FLOAT64_C( -391.22), SIMDE_FLOAT64_C( -833.32), SIMDE_FLOAT64_C( -442.41), SIMDE_FLOAT64_C( -849.84) }, + { SIMDE_FLOAT64_C( 636.48), SIMDE_FLOAT64_C( -888.77), SIMDE_FLOAT64_C( 774.14), SIMDE_FLOAT64_C( 899.78), + SIMDE_FLOAT64_C( 930.34), SIMDE_FLOAT64_C( -854.78), SIMDE_FLOAT64_C( -218.74), SIMDE_FLOAT64_C( -76.33) }, + { SIMDE_FLOAT64_C( -582.98), SIMDE_FLOAT64_C( 928.89), SIMDE_FLOAT64_C( 365.19), SIMDE_FLOAT64_C( 650.19), + SIMDE_FLOAT64_C( 876.55), SIMDE_FLOAT64_C( 107.53), SIMDE_FLOAT64_C( 137.56), SIMDE_FLOAT64_C( 432.98) }, + { SIMDE_FLOAT64_C(203098.19), SIMDE_FLOAT64_C( 39279.32), SIMDE_FLOAT64_C(-493275.61), SIMDE_FLOAT64_C(-564834.55), + SIMDE_FLOAT64_C(-364844.16), SIMDE_FLOAT64_C(712412.80), SIMDE_FLOAT64_C( 96635.20), SIMDE_FLOAT64_C( 65301.27) } }, + { { SIMDE_FLOAT64_C( -619.36), SIMDE_FLOAT64_C( 685.27), SIMDE_FLOAT64_C( 539.89), SIMDE_FLOAT64_C( 525.54), + SIMDE_FLOAT64_C( -786.31), SIMDE_FLOAT64_C( 141.04), SIMDE_FLOAT64_C( 966.01), SIMDE_FLOAT64_C( 531.86) }, + { SIMDE_FLOAT64_C( -902.11), SIMDE_FLOAT64_C( -670.70), SIMDE_FLOAT64_C( 903.40), SIMDE_FLOAT64_C( -293.34), + SIMDE_FLOAT64_C( -504.02), SIMDE_FLOAT64_C( -539.01), SIMDE_FLOAT64_C( -143.18), SIMDE_FLOAT64_C( -867.55) }, + { SIMDE_FLOAT64_C( -427.77), SIMDE_FLOAT64_C( -369.04), SIMDE_FLOAT64_C( -967.76), SIMDE_FLOAT64_C( -497.44), + SIMDE_FLOAT64_C( -223.82), SIMDE_FLOAT64_C( -186.50), SIMDE_FLOAT64_C( 426.23), SIMDE_FLOAT64_C( 193.20) }, + { SIMDE_FLOAT64_C(559158.62), SIMDE_FLOAT64_C(-459979.63), SIMDE_FLOAT64_C(488704.39), SIMDE_FLOAT64_C(-154659.34), + SIMDE_FLOAT64_C(396539.79), SIMDE_FLOAT64_C(-76208.47), SIMDE_FLOAT64_C(-138739.54), SIMDE_FLOAT64_C(-461221.94) } }, + { { SIMDE_FLOAT64_C( -257.62), SIMDE_FLOAT64_C( -208.57), SIMDE_FLOAT64_C( -156.61), SIMDE_FLOAT64_C( -381.07), + SIMDE_FLOAT64_C( 898.96), SIMDE_FLOAT64_C( 980.94), SIMDE_FLOAT64_C( -948.08), SIMDE_FLOAT64_C( -720.39) }, + { SIMDE_FLOAT64_C( 666.21), SIMDE_FLOAT64_C( 591.80), SIMDE_FLOAT64_C( 805.15), SIMDE_FLOAT64_C( 879.90), + SIMDE_FLOAT64_C( -267.16), SIMDE_FLOAT64_C( 771.16), SIMDE_FLOAT64_C( 411.76), SIMDE_FLOAT64_C( -169.27) }, + { SIMDE_FLOAT64_C( -899.54), SIMDE_FLOAT64_C( 315.16), SIMDE_FLOAT64_C( 537.39), SIMDE_FLOAT64_C( -403.57), + SIMDE_FLOAT64_C( 776.15), SIMDE_FLOAT64_C( -605.78), SIMDE_FLOAT64_C( -271.12), SIMDE_FLOAT64_C( -651.62) }, + { SIMDE_FLOAT64_C(-170729.48), SIMDE_FLOAT64_C(-123116.57), SIMDE_FLOAT64_C(-126631.93), SIMDE_FLOAT64_C(-335707.06), + SIMDE_FLOAT64_C(-240942.30), SIMDE_FLOAT64_C(755855.91), SIMDE_FLOAT64_C(-390110.30), SIMDE_FLOAT64_C(121288.80) } }, + { { SIMDE_FLOAT64_C( 25.18), SIMDE_FLOAT64_C( -238.88), SIMDE_FLOAT64_C( -149.06), SIMDE_FLOAT64_C( 801.35), + SIMDE_FLOAT64_C( 574.61), SIMDE_FLOAT64_C( -722.83), SIMDE_FLOAT64_C( -5.45), SIMDE_FLOAT64_C( -683.00) }, + { SIMDE_FLOAT64_C( 68.60), SIMDE_FLOAT64_C( 837.94), SIMDE_FLOAT64_C( -64.07), SIMDE_FLOAT64_C( -32.44), + SIMDE_FLOAT64_C( 818.88), SIMDE_FLOAT64_C( -12.15), SIMDE_FLOAT64_C( 247.16), SIMDE_FLOAT64_C( 485.09) }, + { SIMDE_FLOAT64_C( -420.35), SIMDE_FLOAT64_C( 52.31), SIMDE_FLOAT64_C( 364.99), SIMDE_FLOAT64_C( 312.49), + SIMDE_FLOAT64_C( -176.53), SIMDE_FLOAT64_C( -223.25), SIMDE_FLOAT64_C( -856.78), SIMDE_FLOAT64_C( -76.07) }, + { SIMDE_FLOAT64_C( 2147.70), SIMDE_FLOAT64_C(-200114.80), SIMDE_FLOAT64_C( 9185.28), SIMDE_FLOAT64_C(-25683.30), + SIMDE_FLOAT64_C(470713.17), SIMDE_FLOAT64_C( 8559.13), SIMDE_FLOAT64_C( -490.24), SIMDE_FLOAT64_C(-331392.54) } }, + { { SIMDE_FLOAT64_C( -908.09), SIMDE_FLOAT64_C( 680.61), SIMDE_FLOAT64_C( 520.36), SIMDE_FLOAT64_C( 868.07), + SIMDE_FLOAT64_C( -925.17), SIMDE_FLOAT64_C( -750.76), SIMDE_FLOAT64_C( -783.55), SIMDE_FLOAT64_C( 100.00) }, + { SIMDE_FLOAT64_C( 10.36), SIMDE_FLOAT64_C( 67.39), SIMDE_FLOAT64_C( -98.64), SIMDE_FLOAT64_C( -415.02), + SIMDE_FLOAT64_C( 344.56), SIMDE_FLOAT64_C( 895.91), SIMDE_FLOAT64_C( -98.03), SIMDE_FLOAT64_C( -586.84) }, + { SIMDE_FLOAT64_C( 733.85), SIMDE_FLOAT64_C( 837.90), SIMDE_FLOAT64_C( 380.71), SIMDE_FLOAT64_C( 552.74), + SIMDE_FLOAT64_C( -174.25), SIMDE_FLOAT64_C( -372.12), SIMDE_FLOAT64_C( 37.83), SIMDE_FLOAT64_C( 405.40) }, + { SIMDE_FLOAT64_C(-10141.66), SIMDE_FLOAT64_C( 46704.21), SIMDE_FLOAT64_C(-51709.02), SIMDE_FLOAT64_C(-359713.67), + SIMDE_FLOAT64_C(-318602.33), SIMDE_FLOAT64_C(-672985.51), SIMDE_FLOAT64_C( 76773.58), SIMDE_FLOAT64_C(-58278.60) } }, + { { SIMDE_FLOAT64_C( 680.19), SIMDE_FLOAT64_C( -597.18), SIMDE_FLOAT64_C( -282.11), SIMDE_FLOAT64_C( -496.33), + SIMDE_FLOAT64_C( 179.58), SIMDE_FLOAT64_C( -138.89), SIMDE_FLOAT64_C( 427.60), SIMDE_FLOAT64_C( 271.49) }, + { SIMDE_FLOAT64_C( -458.28), SIMDE_FLOAT64_C( -52.04), SIMDE_FLOAT64_C( 139.56), SIMDE_FLOAT64_C( -383.45), + SIMDE_FLOAT64_C( 197.20), SIMDE_FLOAT64_C( 356.00), SIMDE_FLOAT64_C( 716.55), SIMDE_FLOAT64_C( -792.44) }, + { SIMDE_FLOAT64_C( -576.61), SIMDE_FLOAT64_C( -382.09), SIMDE_FLOAT64_C( -207.46), SIMDE_FLOAT64_C( 767.95), + SIMDE_FLOAT64_C( -486.18), SIMDE_FLOAT64_C( 694.51), SIMDE_FLOAT64_C( -818.90), SIMDE_FLOAT64_C( -752.33) }, + { SIMDE_FLOAT64_C(-311140.86), SIMDE_FLOAT64_C( 30695.16), SIMDE_FLOAT64_C(-39163.81), SIMDE_FLOAT64_C(191085.69), + SIMDE_FLOAT64_C( 35899.36), SIMDE_FLOAT64_C(-48750.33), SIMDE_FLOAT64_C(307215.68), SIMDE_FLOAT64_C(-215891.87) } }, + { { SIMDE_FLOAT64_C( 532.42), SIMDE_FLOAT64_C( 561.82), SIMDE_FLOAT64_C( 800.41), SIMDE_FLOAT64_C( -641.84), + SIMDE_FLOAT64_C( -810.31), SIMDE_FLOAT64_C( -161.76), SIMDE_FLOAT64_C( 763.56), SIMDE_FLOAT64_C( 869.89) }, + { SIMDE_FLOAT64_C( 241.06), SIMDE_FLOAT64_C( -518.54), SIMDE_FLOAT64_C( -626.45), SIMDE_FLOAT64_C( -579.36), + SIMDE_FLOAT64_C( 342.57), SIMDE_FLOAT64_C( 801.15), SIMDE_FLOAT64_C( 692.13), SIMDE_FLOAT64_C( 884.29) }, + { SIMDE_FLOAT64_C( -250.90), SIMDE_FLOAT64_C( -168.31), SIMDE_FLOAT64_C( -499.17), SIMDE_FLOAT64_C( 946.31), + SIMDE_FLOAT64_C( -812.31), SIMDE_FLOAT64_C( -782.61), SIMDE_FLOAT64_C( -846.13), SIMDE_FLOAT64_C( -388.92) }, + { SIMDE_FLOAT64_C(128596.07), SIMDE_FLOAT64_C(-291494.45), SIMDE_FLOAT64_C(-500917.67), SIMDE_FLOAT64_C(372802.73), + SIMDE_FLOAT64_C(-276775.59), SIMDE_FLOAT64_C(-130376.63), SIMDE_FLOAT64_C(529328.91), SIMDE_FLOAT64_C(768846.11) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); + simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); + simde__m512d c = simde_mm512_loadu_pd(test_vec[i].c); + simde__m512d r = simde_mm512_fmaddsub_pd(a, b, c); + simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); + } + + return 0; +} + +static int +test_simde_mm512_fmaddsub_ps (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + const simde_float32 a[16]; + const simde_float32 b[16]; + const simde_float32 c[16]; + const simde_float32 r[16]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( -860.74), SIMDE_FLOAT32_C( 820.16), SIMDE_FLOAT32_C( -773.27), SIMDE_FLOAT32_C( 132.91), + SIMDE_FLOAT32_C( -777.06), SIMDE_FLOAT32_C( 537.91), SIMDE_FLOAT32_C( 228.25), SIMDE_FLOAT32_C( -722.75), + SIMDE_FLOAT32_C( -361.04), SIMDE_FLOAT32_C( 719.15), SIMDE_FLOAT32_C( -853.74), SIMDE_FLOAT32_C( 830.98), + SIMDE_FLOAT32_C( 724.07), SIMDE_FLOAT32_C( 191.76), SIMDE_FLOAT32_C( 502.28), SIMDE_FLOAT32_C( -685.49) }, + { SIMDE_FLOAT32_C( 924.70), SIMDE_FLOAT32_C( -30.28), SIMDE_FLOAT32_C( -911.52), SIMDE_FLOAT32_C( 116.58), + SIMDE_FLOAT32_C( 659.39), SIMDE_FLOAT32_C( 219.58), SIMDE_FLOAT32_C( -160.50), SIMDE_FLOAT32_C( -515.77), + SIMDE_FLOAT32_C( -14.24), SIMDE_FLOAT32_C( -884.00), SIMDE_FLOAT32_C( 539.41), SIMDE_FLOAT32_C( 599.25), + SIMDE_FLOAT32_C( -704.27), SIMDE_FLOAT32_C( 35.25), SIMDE_FLOAT32_C( 663.34), SIMDE_FLOAT32_C( -565.01) }, + { SIMDE_FLOAT32_C( -144.59), SIMDE_FLOAT32_C( 890.08), SIMDE_FLOAT32_C( 567.90), SIMDE_FLOAT32_C( 78.35), + SIMDE_FLOAT32_C( 427.99), SIMDE_FLOAT32_C( -203.85), SIMDE_FLOAT32_C( 355.60), SIMDE_FLOAT32_C( -933.05), + SIMDE_FLOAT32_C( -484.71), SIMDE_FLOAT32_C( 501.87), SIMDE_FLOAT32_C( 897.94), SIMDE_FLOAT32_C( -760.64), + SIMDE_FLOAT32_C( -306.37), SIMDE_FLOAT32_C( 400.22), SIMDE_FLOAT32_C( -446.13), SIMDE_FLOAT32_C( -381.67) }, + { SIMDE_FLOAT32_C(-795781.69), SIMDE_FLOAT32_C(-23944.37), SIMDE_FLOAT32_C(704283.25), SIMDE_FLOAT32_C( 15573.00), + SIMDE_FLOAT32_C(-512813.59), SIMDE_FLOAT32_C(117910.42), SIMDE_FLOAT32_C(-36989.73), SIMDE_FLOAT32_C(371839.72), + SIMDE_FLOAT32_C( 5625.92), SIMDE_FLOAT32_C(-635226.75), SIMDE_FLOAT32_C(-461413.81), SIMDE_FLOAT32_C(497204.12), + SIMDE_FLOAT32_C(-509634.44), SIMDE_FLOAT32_C( 7159.76), SIMDE_FLOAT32_C(333628.56), SIMDE_FLOAT32_C(386927.06) } }, + { { SIMDE_FLOAT32_C( -630.06), SIMDE_FLOAT32_C( -357.65), SIMDE_FLOAT32_C( 734.90), SIMDE_FLOAT32_C( -970.67), + SIMDE_FLOAT32_C( 861.93), SIMDE_FLOAT32_C( -425.60), SIMDE_FLOAT32_C( -486.44), SIMDE_FLOAT32_C( -152.31), + SIMDE_FLOAT32_C( -309.60), SIMDE_FLOAT32_C( -947.03), SIMDE_FLOAT32_C( -553.07), SIMDE_FLOAT32_C( -13.87), + SIMDE_FLOAT32_C( 88.22), SIMDE_FLOAT32_C( -889.72), SIMDE_FLOAT32_C( 421.11), SIMDE_FLOAT32_C( 943.63) }, + { SIMDE_FLOAT32_C( -999.65), SIMDE_FLOAT32_C( -10.99), SIMDE_FLOAT32_C( 21.98), SIMDE_FLOAT32_C( 428.34), + SIMDE_FLOAT32_C( 785.16), SIMDE_FLOAT32_C( -622.42), SIMDE_FLOAT32_C( 495.30), SIMDE_FLOAT32_C( -699.55), + SIMDE_FLOAT32_C( 879.44), SIMDE_FLOAT32_C( 393.23), SIMDE_FLOAT32_C( -460.19), SIMDE_FLOAT32_C( -426.93), + SIMDE_FLOAT32_C( -206.55), SIMDE_FLOAT32_C( 93.68), SIMDE_FLOAT32_C( 191.40), SIMDE_FLOAT32_C( 163.39) }, + { SIMDE_FLOAT32_C( 736.02), SIMDE_FLOAT32_C( -73.69), SIMDE_FLOAT32_C( 192.72), SIMDE_FLOAT32_C( 597.95), + SIMDE_FLOAT32_C( 500.71), SIMDE_FLOAT32_C( 706.28), SIMDE_FLOAT32_C( -554.36), SIMDE_FLOAT32_C( -808.89), + SIMDE_FLOAT32_C( 759.24), SIMDE_FLOAT32_C( -107.42), SIMDE_FLOAT32_C( 177.24), SIMDE_FLOAT32_C( -152.54), + SIMDE_FLOAT32_C( 2.85), SIMDE_FLOAT32_C( -401.65), SIMDE_FLOAT32_C( -208.91), SIMDE_FLOAT32_C( 3.21) }, + { SIMDE_FLOAT32_C(629103.50), SIMDE_FLOAT32_C( 3856.88), SIMDE_FLOAT32_C( 15960.38), SIMDE_FLOAT32_C(-415178.84), + SIMDE_FLOAT32_C(676252.25), SIMDE_FLOAT32_C(265608.22), SIMDE_FLOAT32_C(-240379.38), SIMDE_FLOAT32_C(105739.56), + SIMDE_FLOAT32_C(-273033.88), SIMDE_FLOAT32_C(-372508.03), SIMDE_FLOAT32_C(254340.05), SIMDE_FLOAT32_C( 5768.98), + SIMDE_FLOAT32_C(-18224.69), SIMDE_FLOAT32_C(-83750.62), SIMDE_FLOAT32_C( 80809.35), SIMDE_FLOAT32_C(154182.91) } }, + { { SIMDE_FLOAT32_C( 587.36), SIMDE_FLOAT32_C( 813.07), SIMDE_FLOAT32_C( -568.45), SIMDE_FLOAT32_C( 372.52), + SIMDE_FLOAT32_C( -809.36), SIMDE_FLOAT32_C( 926.85), SIMDE_FLOAT32_C( 672.97), SIMDE_FLOAT32_C( -929.91), + SIMDE_FLOAT32_C( 320.08), SIMDE_FLOAT32_C( -787.22), SIMDE_FLOAT32_C( -356.84), SIMDE_FLOAT32_C( -886.47), + SIMDE_FLOAT32_C( 306.46), SIMDE_FLOAT32_C( 834.57), SIMDE_FLOAT32_C( 276.92), SIMDE_FLOAT32_C( 42.49) }, + { SIMDE_FLOAT32_C( -239.13), SIMDE_FLOAT32_C( -530.36), SIMDE_FLOAT32_C( -359.56), SIMDE_FLOAT32_C( -738.42), + SIMDE_FLOAT32_C( -824.08), SIMDE_FLOAT32_C( 86.08), SIMDE_FLOAT32_C( -547.31), SIMDE_FLOAT32_C( 935.16), + SIMDE_FLOAT32_C( 978.66), SIMDE_FLOAT32_C( 629.92), SIMDE_FLOAT32_C( -217.38), SIMDE_FLOAT32_C( -18.49), + SIMDE_FLOAT32_C( -771.73), SIMDE_FLOAT32_C( 573.71), SIMDE_FLOAT32_C( 984.72), SIMDE_FLOAT32_C( 815.63) }, + { SIMDE_FLOAT32_C( 386.78), SIMDE_FLOAT32_C( -583.73), SIMDE_FLOAT32_C( 188.15), SIMDE_FLOAT32_C( 577.42), + SIMDE_FLOAT32_C( -656.88), SIMDE_FLOAT32_C( -138.88), SIMDE_FLOAT32_C( 647.51), SIMDE_FLOAT32_C( 663.20), + SIMDE_FLOAT32_C( 73.90), SIMDE_FLOAT32_C( -709.33), SIMDE_FLOAT32_C( 776.73), SIMDE_FLOAT32_C( -619.64), + SIMDE_FLOAT32_C( -874.76), SIMDE_FLOAT32_C( 53.64), SIMDE_FLOAT32_C( 422.85), SIMDE_FLOAT32_C( -113.89) }, + { SIMDE_FLOAT32_C(-140842.17), SIMDE_FLOAT32_C(-431803.53), SIMDE_FLOAT32_C(204203.73), SIMDE_FLOAT32_C(-274498.81), + SIMDE_FLOAT32_C(667634.25), SIMDE_FLOAT32_C( 79644.37), SIMDE_FLOAT32_C(-368970.69), SIMDE_FLOAT32_C(-868951.38), + SIMDE_FLOAT32_C(313175.56), SIMDE_FLOAT32_C(-496594.94), SIMDE_FLOAT32_C( 76793.16), SIMDE_FLOAT32_C( 15771.19), + SIMDE_FLOAT32_C(-235629.59), SIMDE_FLOAT32_C(478854.81), SIMDE_FLOAT32_C(272265.81), SIMDE_FLOAT32_C( 34542.23) } }, + { { SIMDE_FLOAT32_C( 523.28), SIMDE_FLOAT32_C( -936.71), SIMDE_FLOAT32_C( 147.69), SIMDE_FLOAT32_C( 699.20), + SIMDE_FLOAT32_C( 149.37), SIMDE_FLOAT32_C( 600.38), SIMDE_FLOAT32_C( 634.36), SIMDE_FLOAT32_C( 128.03), + SIMDE_FLOAT32_C( 230.30), SIMDE_FLOAT32_C( -583.02), SIMDE_FLOAT32_C( -890.46), SIMDE_FLOAT32_C( 458.58), + SIMDE_FLOAT32_C( 990.69), SIMDE_FLOAT32_C( -905.74), SIMDE_FLOAT32_C( 274.21), SIMDE_FLOAT32_C( 377.47) }, + { SIMDE_FLOAT32_C( -489.47), SIMDE_FLOAT32_C( -537.64), SIMDE_FLOAT32_C( -45.11), SIMDE_FLOAT32_C( -146.35), + SIMDE_FLOAT32_C( 323.48), SIMDE_FLOAT32_C( -397.60), SIMDE_FLOAT32_C( -483.16), SIMDE_FLOAT32_C( -602.61), + SIMDE_FLOAT32_C( -106.92), SIMDE_FLOAT32_C( -706.43), SIMDE_FLOAT32_C( -222.25), SIMDE_FLOAT32_C( 18.32), + SIMDE_FLOAT32_C( 347.21), SIMDE_FLOAT32_C( -799.40), SIMDE_FLOAT32_C( 904.43), SIMDE_FLOAT32_C( -129.50) }, + { SIMDE_FLOAT32_C( -736.11), SIMDE_FLOAT32_C( 52.12), SIMDE_FLOAT32_C( -430.30), SIMDE_FLOAT32_C( 413.26), + SIMDE_FLOAT32_C( -347.50), SIMDE_FLOAT32_C( -795.94), SIMDE_FLOAT32_C( -458.71), SIMDE_FLOAT32_C( 882.81), + SIMDE_FLOAT32_C( -378.96), SIMDE_FLOAT32_C( -349.17), SIMDE_FLOAT32_C( 341.39), SIMDE_FLOAT32_C( -388.26), + SIMDE_FLOAT32_C( -254.91), SIMDE_FLOAT32_C( -384.40), SIMDE_FLOAT32_C( 989.21), SIMDE_FLOAT32_C( 255.61) }, + { SIMDE_FLOAT32_C(-255393.77), SIMDE_FLOAT32_C(503664.91), SIMDE_FLOAT32_C( -6232.00), SIMDE_FLOAT32_C(-101914.67), + SIMDE_FLOAT32_C( 48665.71), SIMDE_FLOAT32_C(-239507.03), SIMDE_FLOAT32_C(-306038.66), SIMDE_FLOAT32_C(-76269.34), + SIMDE_FLOAT32_C(-24244.71), SIMDE_FLOAT32_C(411513.69), SIMDE_FLOAT32_C(197563.34), SIMDE_FLOAT32_C( 8012.93), + SIMDE_FLOAT32_C(344232.38), SIMDE_FLOAT32_C(723664.19), SIMDE_FLOAT32_C(247014.53), SIMDE_FLOAT32_C(-48626.75) } }, + { { SIMDE_FLOAT32_C( 77.96), SIMDE_FLOAT32_C( -55.90), SIMDE_FLOAT32_C( -890.74), SIMDE_FLOAT32_C( -598.56), + SIMDE_FLOAT32_C( 546.50), SIMDE_FLOAT32_C( -373.90), SIMDE_FLOAT32_C( -201.17), SIMDE_FLOAT32_C( -560.42), + SIMDE_FLOAT32_C( -80.33), SIMDE_FLOAT32_C( 576.58), SIMDE_FLOAT32_C( 457.90), SIMDE_FLOAT32_C( -733.12), + SIMDE_FLOAT32_C( 777.18), SIMDE_FLOAT32_C( 362.33), SIMDE_FLOAT32_C( 137.38), SIMDE_FLOAT32_C( -958.92) }, + { SIMDE_FLOAT32_C( -585.55), SIMDE_FLOAT32_C( 707.08), SIMDE_FLOAT32_C( 454.34), SIMDE_FLOAT32_C( 66.96), + SIMDE_FLOAT32_C( 911.14), SIMDE_FLOAT32_C( 995.62), SIMDE_FLOAT32_C( -50.24), SIMDE_FLOAT32_C( -467.82), + SIMDE_FLOAT32_C( -353.55), SIMDE_FLOAT32_C( -708.85), SIMDE_FLOAT32_C( 143.92), SIMDE_FLOAT32_C( 391.53), + SIMDE_FLOAT32_C( -93.25), SIMDE_FLOAT32_C( 133.13), SIMDE_FLOAT32_C( -352.85), SIMDE_FLOAT32_C( 984.70) }, + { SIMDE_FLOAT32_C( -922.77), SIMDE_FLOAT32_C( -243.59), SIMDE_FLOAT32_C( -613.86), SIMDE_FLOAT32_C( 623.73), + SIMDE_FLOAT32_C( 382.51), SIMDE_FLOAT32_C( 184.97), SIMDE_FLOAT32_C( -936.69), SIMDE_FLOAT32_C( -697.82), + SIMDE_FLOAT32_C( -238.45), SIMDE_FLOAT32_C( 521.20), SIMDE_FLOAT32_C( -430.93), SIMDE_FLOAT32_C( -461.26), + SIMDE_FLOAT32_C( -116.47), SIMDE_FLOAT32_C( 706.45), SIMDE_FLOAT32_C( -420.19), SIMDE_FLOAT32_C( 297.98) }, + { SIMDE_FLOAT32_C(-44726.71), SIMDE_FLOAT32_C(-39769.36), SIMDE_FLOAT32_C(-404084.94), SIMDE_FLOAT32_C(-39455.85), + SIMDE_FLOAT32_C(497555.53), SIMDE_FLOAT32_C(-372077.34), SIMDE_FLOAT32_C( 11043.47), SIMDE_FLOAT32_C(261477.88), + SIMDE_FLOAT32_C( 28639.12), SIMDE_FLOAT32_C(-408187.53), SIMDE_FLOAT32_C( 66331.90), SIMDE_FLOAT32_C(-287499.72), + SIMDE_FLOAT32_C(-72355.56), SIMDE_FLOAT32_C( 48943.44), SIMDE_FLOAT32_C(-48054.34), SIMDE_FLOAT32_C(-943950.50) } }, + { { SIMDE_FLOAT32_C( 413.52), SIMDE_FLOAT32_C( -965.85), SIMDE_FLOAT32_C( -635.06), SIMDE_FLOAT32_C( 324.66), + SIMDE_FLOAT32_C( -970.23), SIMDE_FLOAT32_C( 314.71), SIMDE_FLOAT32_C( 856.85), SIMDE_FLOAT32_C( -323.78), + SIMDE_FLOAT32_C( 605.86), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( -932.24), SIMDE_FLOAT32_C( -487.40), + SIMDE_FLOAT32_C( -866.11), SIMDE_FLOAT32_C( -285.09), SIMDE_FLOAT32_C( -502.70), SIMDE_FLOAT32_C( -788.88) }, + { SIMDE_FLOAT32_C( 471.32), SIMDE_FLOAT32_C( -116.55), SIMDE_FLOAT32_C( 834.85), SIMDE_FLOAT32_C( -146.17), + SIMDE_FLOAT32_C( -931.58), SIMDE_FLOAT32_C( 898.16), SIMDE_FLOAT32_C( 156.01), SIMDE_FLOAT32_C( -170.03), + SIMDE_FLOAT32_C( 419.36), SIMDE_FLOAT32_C( 725.08), SIMDE_FLOAT32_C( 368.71), SIMDE_FLOAT32_C( -697.11), + SIMDE_FLOAT32_C( 431.52), SIMDE_FLOAT32_C( 948.53), SIMDE_FLOAT32_C( 600.88), SIMDE_FLOAT32_C( -154.95) }, + { SIMDE_FLOAT32_C( 982.68), SIMDE_FLOAT32_C( 965.82), SIMDE_FLOAT32_C( -830.29), SIMDE_FLOAT32_C( -987.55), + SIMDE_FLOAT32_C( 280.53), SIMDE_FLOAT32_C( -973.44), SIMDE_FLOAT32_C( -311.33), SIMDE_FLOAT32_C( -113.62), + SIMDE_FLOAT32_C( 27.32), SIMDE_FLOAT32_C( -243.57), SIMDE_FLOAT32_C( 398.98), SIMDE_FLOAT32_C( 161.21), + SIMDE_FLOAT32_C( 471.34), SIMDE_FLOAT32_C( 896.29), SIMDE_FLOAT32_C( 372.34), SIMDE_FLOAT32_C( -57.35) }, + { SIMDE_FLOAT32_C(193917.56), SIMDE_FLOAT32_C(113535.64), SIMDE_FLOAT32_C(-529349.50), SIMDE_FLOAT32_C(-48443.10), + SIMDE_FLOAT32_C(903566.38), SIMDE_FLOAT32_C(281686.47), SIMDE_FLOAT32_C(133988.48), SIMDE_FLOAT32_C( 54938.69), + SIMDE_FLOAT32_C(254046.12), SIMDE_FLOAT32_C( 307.49), SIMDE_FLOAT32_C(-344125.16), SIMDE_FLOAT32_C(339932.62), + SIMDE_FLOAT32_C(-374215.12), SIMDE_FLOAT32_C(-269520.16), SIMDE_FLOAT32_C(-302434.72), SIMDE_FLOAT32_C(122179.60) } }, + { { SIMDE_FLOAT32_C( 56.13), SIMDE_FLOAT32_C( 234.39), SIMDE_FLOAT32_C( 355.70), SIMDE_FLOAT32_C( 681.34), + SIMDE_FLOAT32_C( 414.66), SIMDE_FLOAT32_C( 322.03), SIMDE_FLOAT32_C( 425.99), SIMDE_FLOAT32_C( 289.46), + SIMDE_FLOAT32_C( -150.11), SIMDE_FLOAT32_C( -418.13), SIMDE_FLOAT32_C( -147.63), SIMDE_FLOAT32_C( -24.52), + SIMDE_FLOAT32_C( 576.43), SIMDE_FLOAT32_C( -309.62), SIMDE_FLOAT32_C( 977.71), SIMDE_FLOAT32_C( 402.97) }, + { SIMDE_FLOAT32_C( 354.53), SIMDE_FLOAT32_C( -591.17), SIMDE_FLOAT32_C( 937.45), SIMDE_FLOAT32_C( -426.44), + SIMDE_FLOAT32_C( 320.01), SIMDE_FLOAT32_C( -774.20), SIMDE_FLOAT32_C( -583.11), SIMDE_FLOAT32_C( 563.73), + SIMDE_FLOAT32_C( 447.20), SIMDE_FLOAT32_C( -666.65), SIMDE_FLOAT32_C( -85.29), SIMDE_FLOAT32_C( 645.89), + SIMDE_FLOAT32_C( -95.73), SIMDE_FLOAT32_C( -987.16), SIMDE_FLOAT32_C( 776.41), SIMDE_FLOAT32_C( 960.40) }, + { SIMDE_FLOAT32_C( 247.23), SIMDE_FLOAT32_C( 132.11), SIMDE_FLOAT32_C( 641.74), SIMDE_FLOAT32_C( -338.12), + SIMDE_FLOAT32_C( -545.85), SIMDE_FLOAT32_C( 67.73), SIMDE_FLOAT32_C( 951.35), SIMDE_FLOAT32_C( 304.04), + SIMDE_FLOAT32_C( 649.60), SIMDE_FLOAT32_C( -196.28), SIMDE_FLOAT32_C( -720.48), SIMDE_FLOAT32_C( 226.04), + SIMDE_FLOAT32_C( 494.09), SIMDE_FLOAT32_C( -742.76), SIMDE_FLOAT32_C( -370.99), SIMDE_FLOAT32_C( -151.37) }, + { SIMDE_FLOAT32_C( 19652.54), SIMDE_FLOAT32_C(-138432.22), SIMDE_FLOAT32_C(332809.22), SIMDE_FLOAT32_C(-290888.78), + SIMDE_FLOAT32_C(133241.20), SIMDE_FLOAT32_C(-249247.89), SIMDE_FLOAT32_C(-249350.36), SIMDE_FLOAT32_C(163481.33), + SIMDE_FLOAT32_C(-67778.80), SIMDE_FLOAT32_C(278550.09), SIMDE_FLOAT32_C( 13311.84), SIMDE_FLOAT32_C(-15611.18), + SIMDE_FLOAT32_C(-55675.73), SIMDE_FLOAT32_C(304901.72), SIMDE_FLOAT32_C(759474.81), SIMDE_FLOAT32_C(386861.03) } }, + { { SIMDE_FLOAT32_C( -333.93), SIMDE_FLOAT32_C( -433.54), SIMDE_FLOAT32_C( 422.18), SIMDE_FLOAT32_C( 986.08), + SIMDE_FLOAT32_C( -207.74), SIMDE_FLOAT32_C( 839.07), SIMDE_FLOAT32_C( 549.80), SIMDE_FLOAT32_C( -760.55), + SIMDE_FLOAT32_C( -827.59), SIMDE_FLOAT32_C( -535.49), SIMDE_FLOAT32_C( 885.34), SIMDE_FLOAT32_C( 76.68), + SIMDE_FLOAT32_C( -522.64), SIMDE_FLOAT32_C( 661.76), SIMDE_FLOAT32_C( 37.08), SIMDE_FLOAT32_C( 724.58) }, + { SIMDE_FLOAT32_C( -206.13), SIMDE_FLOAT32_C( -321.19), SIMDE_FLOAT32_C( -613.53), SIMDE_FLOAT32_C( 248.02), + SIMDE_FLOAT32_C( 746.54), SIMDE_FLOAT32_C( -662.19), SIMDE_FLOAT32_C( -447.94), SIMDE_FLOAT32_C( 396.15), + SIMDE_FLOAT32_C( 141.53), SIMDE_FLOAT32_C( -168.42), SIMDE_FLOAT32_C( -377.82), SIMDE_FLOAT32_C( -364.38), + SIMDE_FLOAT32_C( 88.82), SIMDE_FLOAT32_C( 251.19), SIMDE_FLOAT32_C( 484.25), SIMDE_FLOAT32_C( 754.89) }, + { SIMDE_FLOAT32_C( 817.66), SIMDE_FLOAT32_C( -93.57), SIMDE_FLOAT32_C( 740.96), SIMDE_FLOAT32_C( -390.09), + SIMDE_FLOAT32_C( -254.50), SIMDE_FLOAT32_C( 290.76), SIMDE_FLOAT32_C( -150.63), SIMDE_FLOAT32_C( -82.08), + SIMDE_FLOAT32_C( 755.28), SIMDE_FLOAT32_C( -265.29), SIMDE_FLOAT32_C( 994.60), SIMDE_FLOAT32_C( -767.37), + SIMDE_FLOAT32_C( -603.53), SIMDE_FLOAT32_C( 31.68), SIMDE_FLOAT32_C( 957.22), SIMDE_FLOAT32_C( 190.34) }, + { SIMDE_FLOAT32_C( 68015.34), SIMDE_FLOAT32_C(139155.16), SIMDE_FLOAT32_C(-259761.06), SIMDE_FLOAT32_C(244177.47), + SIMDE_FLOAT32_C(-154831.72), SIMDE_FLOAT32_C(-555333.00), SIMDE_FLOAT32_C(-246126.78), SIMDE_FLOAT32_C(-301373.97), + SIMDE_FLOAT32_C(-117884.09), SIMDE_FLOAT32_C( 89921.94), SIMDE_FLOAT32_C(-335493.78), SIMDE_FLOAT32_C(-28708.03), + SIMDE_FLOAT32_C(-45817.36), SIMDE_FLOAT32_C(166259.19), SIMDE_FLOAT32_C( 16998.77), SIMDE_FLOAT32_C(547168.56) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); + simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); + simde__m512 c = simde_mm512_loadu_ps(test_vec[i].c); + simde__m512 r = simde_mm512_fmaddsub_ps(a, b, c); + simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); + } + + return 0; +} + + +SIMDE_TEST_FUNC_LIST_BEGIN + SIMDE_TEST_FUNC_LIST_ENTRY(x_mm512_addsub_pd) + SIMDE_TEST_FUNC_LIST_ENTRY(x_mm512_addsub_ps) + SIMDE_TEST_FUNC_LIST_ENTRY(mm512_fmaddsub_pd) + SIMDE_TEST_FUNC_LIST_ENTRY(mm512_fmaddsub_ps) +SIMDE_TEST_FUNC_LIST_END + +#include