From 826da41c8cb27bd9c2715c989cfea9a3b8136a6b Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Tue, 21 Nov 2023 13:52:07 +0100 Subject: [PATCH] wasm relaxed: add f{32x4,64x2}_relaxed_{min,max} --- simde/wasm/relaxed-simd.h | 88 ++++++++++++++++- simde/wasm/simd128.h | 6 +- test/wasm/relaxed-simd/max.c | 146 +++++++++++++++++++++++++++++ test/wasm/relaxed-simd/meson.build | 2 + test/wasm/relaxed-simd/min.c | 146 +++++++++++++++++++++++++++++ 5 files changed, 383 insertions(+), 5 deletions(-) create mode 100644 test/wasm/relaxed-simd/max.c create mode 100644 test/wasm/relaxed-simd/min.c diff --git a/simde/wasm/relaxed-simd.h b/simde/wasm/relaxed-simd.h index 09077b269..3ef4a110c 100644 --- a/simde/wasm/relaxed-simd.h +++ b/simde/wasm/relaxed-simd.h @@ -378,7 +378,7 @@ simde_wasm_f32x4_relaxed_madd (simde_v128_t a, simde_v128_t b, simde_v128_t c) { #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f32 = vec_madd(a_.altivec_f32, b_.altivec_f32, c_.altivec_f32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - r_.neon_f32 = vfmaq_f32(a_.neon_f32, b_.neon_f32, c_.neon_f32); + r_.neon_f32 = vfmaq_f32(c_.neon_f32, a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vmlaq_f32(c_.neon_f32, a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_X86_FMA_NATIVE) @@ -458,7 +458,7 @@ simde_wasm_f32x4_relaxed_nmadd (simde_v128_t a, simde_v128_t b, simde_v128_t c) #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f32 = vec_nmsub(a_.altivec_f32, b_.altivec_f32, c_.altivec_f32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) - r_.neon_f32 = vfmsq_f32(a_.neon_f32, b_.neon_f32, c_.neon_f32); + r_.neon_f32 = vfmsq_f32(c_.neon_f32, a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vmlsq_f32(c_.neon_f32, a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_X86_FMA_NATIVE) @@ -498,7 +498,7 @@ simde_wasm_f64x2_relaxed_nmadd (simde_v128_t a, simde_v128_t b, simde_v128_t c) #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = vec_nmsub(a_.altivec_f64, b_.altivec_f64, c_.altivec_f64); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vfmsq_f64(a_.neon_f64, b_.neon_f64, c_.neon_f64); + r_.neon_f64 = vfmsq_f64(c_.neon_f64, a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_X86_FMA_NATIVE) r_.sse_m128d = _mm_fnmadd_pd(a_.sse_m128d, b_.sse_m128d, c_.sse_m128d); #elif defined(SIMDE_MIPS_MSA_NATIVE) @@ -519,6 +519,88 @@ simde_wasm_f64x2_relaxed_nmadd (simde_v128_t a, simde_v128_t b, simde_v128_t c) #define wasm_f64x2_relaxed_nmadd(a, b, c) simde_wasm_f64x2_relaxed_nmadd((a), (b), (c)) #endif +/* min/max */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_relaxed_min (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) + return wasm_f32x4_relaxed_min(a, b); + #elif defined(SIMDE_X86_SSE_NATIVE) + return simde_v128_from_m128(_mm_min_ps(simde_v128_to_m128(a), simde_v128_to_m128(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde_v128_from_neon_f32(vminq_f32(simde_v128_to_neon_f32(a), simde_v128_to_neon_f32(b))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde_v128_from_altivec_f32(vec_min(simde_v128_to_altivec_f32(a), simde_v128_to_altivec_f32(b))); + #elif defined(SIMDE_MIPS_MSA_NATIVE) + return simde_v128_from_msa_v4f32(__msa_fmin_w(simde_v128_to_msa_v4f32(a), simde_v128_to_msa_v4f32(b))); + #else + return simde_wasm_f32x4_min(a, b); + #endif +} +#if defined(SIMDE_WASM_RELAXED_SIMD_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_relaxed_min(a, b) simde_wasm_f32x4_relaxed_min((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_relaxed_max (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) + return wasm_f32x4_relaxed_max(a, b); + #elif defined(SIMDE_X86_SSE_NATIVE) + return simde_v128_from_m128(_mm_max_ps(simde_v128_to_m128(a), simde_v128_to_m128(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde_v128_from_neon_f32(vmaxq_f32(simde_v128_to_neon_f32(a), simde_v128_to_neon_f32(b))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde_v128_from_altivec_f32(vec_max(simde_v128_to_altivec_f32(a), simde_v128_to_altivec_f32(b))); + #elif defined(SIMDE_MIPS_MSA_NATIVE) + return simde_v128_from_msa_v4f32(__msa_fmax_w(simde_v128_to_msa_v4f32(a), simde_v128_to_msa_v4f32(b))); + #else + return simde_wasm_f32x4_max(a, b); + #endif +} +#if defined(SIMDE_WASM_RELAXED_SIMD_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_relaxed_max(a, b) simde_wasm_f32x4_relaxed_max((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_relaxed_min (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) + return wasm_f64x2_relaxed_min(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return simde_v128_from_m128d(_mm_min_pd(simde_v128_to_m128d(a), simde_v128_to_m128d(b))); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde_v128_from_neon_f64(vminq_f64(simde_v128_to_neon_f64(a), simde_v128_to_neon_f64(b))); + #elif defined(SIMDE_MIPS_MSA_NATIVE) + return simde_v128_from_msa_v2f64(__msa_fmin_d(simde_v128_to_msa_v2f64(a), simde_v128_to_msa_v2f64(b))); + #else + return simde_wasm_f64x2_min(a, b); + #endif +} +#if defined(SIMDE_WASM_RELAXED_SIMD_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_relaxed_min(a, b) simde_wasm_f64x2_relaxed_min((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_relaxed_max (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) + return wasm_f64x2_relaxed_max(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return simde_v128_from_m128d(_mm_max_pd(simde_v128_to_m128d(a), simde_v128_to_m128d(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde_v128_from_neon_f64(vmaxq_f64(simde_v128_to_neon_f64(a), simde_v128_to_neon_f64(b))); + #elif defined(SIMDE_MIPS_MSA_NATIVE) + return simde_v128_from_msa_v2f64(__msa_fmax_w(simde_v128_to_msa_v2f64(a), simde_v128_to_msa_v2f64(b))); + #else + return simde_wasm_f32x4_max(a, b); + #endif +} +#if defined(SIMDE_WASM_RELAXED_SIMD_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_relaxed_max(a, b) simde_wasm_f64x2_relaxed_max((a), (b)) +#endif + SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP diff --git a/simde/wasm/simd128.h b/simde/wasm/simd128.h index a7c06a93f..f006df35e 100644 --- a/simde/wasm/simd128.h +++ b/simde/wasm/simd128.h @@ -177,8 +177,10 @@ SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(simde_v128_private, simde_v128_ : (((x) == 0) && ((y) == 0)) ? (simde_math_signbit(x) ? (y) : (x)) \ : ((x) > (y) ? (x) : (y))) -#if defined(SIMDE_X86_SSE2_NATIVE) +#if defined(SIMDE_X86_SSE_NATIVE) SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(__m128 , simde_v128_t, simde_v128_to_m128 , simde_v128_from_m128 ) +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(__m128i, simde_v128_t, simde_v128_to_m128i, simde_v128_from_m128i) SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(__m128d, simde_v128_t, simde_v128_to_m128d, simde_v128_from_m128d) #endif @@ -6061,7 +6063,7 @@ simde_wasm_f32x4_pmax (simde_v128_t a, simde_v128_t b) { b_ = simde_v128_to_private(b), r_; - #if defined(SIMDE_X86_SSE2_NATIVE) + #if defined(SIMDE_X86_SSE_NATIVE) r_.sse_m128 = _mm_max_ps(b_.sse_m128, a_.sse_m128); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vbslq_f32(vcltq_f32(a_.neon_f32, b_.neon_f32), b_.neon_f32, a_.neon_f32); diff --git a/test/wasm/relaxed-simd/max.c b/test/wasm/relaxed-simd/max.c new file mode 100644 index 000000000..d02c4f3e1 --- /dev/null +++ b/test/wasm/relaxed-simd/max.c @@ -0,0 +1,146 @@ +/* Copyright (c) 2021 Evan Nemerson + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define SIMDE_TEST_WASM_RELAXED_SIMD_INSN max +#include "../../../simde/wasm/relaxed-simd.h" +#include "test-relaxed-simd.h" + +static int +test_simde_wasm_f32x4_relaxed_max(SIMDE_MUNIT_TEST_ARGS) { + #if 1 + SIMDE_TEST_STRUCT_MODIFIERS struct { + simde_float32 a[sizeof(simde_v128_t) / sizeof(simde_float32)]; + simde_float32 b[sizeof(simde_v128_t) / sizeof(simde_float32)]; + simde_float32 r[sizeof(simde_v128_t) / sizeof(simde_float32)]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( -920.07), SIMDE_FLOAT32_C( 830.32), SIMDE_FLOAT32_C( -196.85), SIMDE_FLOAT32_C( 291.81) }, + { SIMDE_FLOAT32_C( -289.04), SIMDE_FLOAT32_C( 49.93), SIMDE_FLOAT32_C( -928.55), SIMDE_FLOAT32_C( 69.77) }, + { SIMDE_FLOAT32_C( -289.04), SIMDE_FLOAT32_C( 830.32), SIMDE_FLOAT32_C( -196.85), SIMDE_FLOAT32_C( 291.81) } }, + { { SIMDE_FLOAT32_C( -679.01), SIMDE_FLOAT32_C( 273.37), SIMDE_FLOAT32_C( -532.77), SIMDE_FLOAT32_C( 127.55) }, + { SIMDE_FLOAT32_C( 100.92), SIMDE_FLOAT32_C( -536.72), SIMDE_FLOAT32_C( -535.51), SIMDE_FLOAT32_C( -444.83) }, + { SIMDE_FLOAT32_C( 100.92), SIMDE_FLOAT32_C( 273.37), SIMDE_FLOAT32_C( -532.77), SIMDE_FLOAT32_C( 127.55) } }, + { { SIMDE_FLOAT32_C( -370.35), SIMDE_FLOAT32_C( 910.66), SIMDE_FLOAT32_C( -449.76), SIMDE_FLOAT32_C( 756.97) }, + { SIMDE_FLOAT32_C( -168.50), SIMDE_FLOAT32_C( 242.88), SIMDE_FLOAT32_C( -696.72), SIMDE_FLOAT32_C( 663.79) }, + { SIMDE_FLOAT32_C( -168.50), SIMDE_FLOAT32_C( 910.66), SIMDE_FLOAT32_C( -449.76), SIMDE_FLOAT32_C( 756.97) } }, + { { SIMDE_FLOAT32_C( -777.21), SIMDE_FLOAT32_C( -561.66), SIMDE_FLOAT32_C( 225.28), SIMDE_FLOAT32_C( 30.10) }, + { SIMDE_FLOAT32_C( -535.78), SIMDE_FLOAT32_C( -546.75), SIMDE_FLOAT32_C( 345.48), SIMDE_FLOAT32_C( 708.52) }, + { SIMDE_FLOAT32_C( -535.78), SIMDE_FLOAT32_C( -546.75), SIMDE_FLOAT32_C( 345.48), SIMDE_FLOAT32_C( 708.52) } }, + { { SIMDE_FLOAT32_C( 940.56), SIMDE_FLOAT32_C( 303.87), SIMDE_FLOAT32_C( 810.53), SIMDE_FLOAT32_C( -526.94) }, + { SIMDE_FLOAT32_C( 611.43), SIMDE_FLOAT32_C( 797.37), SIMDE_FLOAT32_C( 432.36), SIMDE_FLOAT32_C( 709.89) }, + { SIMDE_FLOAT32_C( 940.56), SIMDE_FLOAT32_C( 797.37), SIMDE_FLOAT32_C( 810.53), SIMDE_FLOAT32_C( 709.89) } }, + { { SIMDE_FLOAT32_C( -157.18), SIMDE_FLOAT32_C( -402.78), SIMDE_FLOAT32_C( -362.67), SIMDE_FLOAT32_C( -944.01) }, + { SIMDE_FLOAT32_C( 106.70), SIMDE_FLOAT32_C( -208.07), SIMDE_FLOAT32_C( 277.04), SIMDE_FLOAT32_C( 374.11) }, + { SIMDE_FLOAT32_C( 106.70), SIMDE_FLOAT32_C( -208.07), SIMDE_FLOAT32_C( 277.04), SIMDE_FLOAT32_C( 374.11) } }, + { { SIMDE_FLOAT32_C( -402.16), SIMDE_FLOAT32_C( 227.84), SIMDE_FLOAT32_C( -493.53), SIMDE_FLOAT32_C( 869.48) }, + { SIMDE_FLOAT32_C( 903.71), SIMDE_FLOAT32_C( -712.51), SIMDE_FLOAT32_C( -864.99), SIMDE_FLOAT32_C( -151.96) }, + { SIMDE_FLOAT32_C( 903.71), SIMDE_FLOAT32_C( 227.84), SIMDE_FLOAT32_C( -493.53), SIMDE_FLOAT32_C( 869.48) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_v128_t a = simde_wasm_v128_load(test_vec[i].a); + simde_v128_t b = simde_wasm_v128_load(test_vec[i].b); + simde_v128_t r = simde_wasm_f32x4_relaxed_max(a, b); + simde_test_wasm_f32x4_assert_equal(r, simde_wasm_v128_load(test_vec[i].r), INT_MAX); + } + return 0; + #else + fputc('\n', stdout); + simde_float32 inputs[8 * 2 * (sizeof(simde_v128_t) / sizeof(simde_float32))]; + simde_test_wasm_f32x4_random_full(8, 2, inputs, -SIMDE_FLOAT32_C(1000.0), SIMDE_FLOAT32_C(1000.0), HEDLEY_STATIC_CAST(SimdeTestVecFloatType, SIMDE_TEST_VEC_FLOAT_NAN | SIMDE_TEST_VEC_FLOAT_EQUAL)); + for (size_t i = 0 ; i < 8 ; i++) { + simde_v128_t + a = simde_test_wasm_f32x4_random_full_extract(2, inputs, i, 0), + b = simde_test_wasm_f32x4_random_full_extract(2, inputs, i, 1), + r; + + r = simde_wasm_f32x4_relaxed_max(a, b); + + simde_test_wasm_f32x4_write(3, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_wasm_f32x4_write(3, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_wasm_f32x4_write(3, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; + #endif +} + +static int +test_simde_wasm_f64x2_relaxed_max(SIMDE_MUNIT_TEST_ARGS) { + #if 1 + SIMDE_TEST_STRUCT_MODIFIERS struct { + simde_float64 a[sizeof(simde_v128_t) / sizeof(simde_float64)]; + simde_float64 b[sizeof(simde_v128_t) / sizeof(simde_float64)]; + simde_float64 r[sizeof(simde_v128_t) / sizeof(simde_float64)]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( 523.06), SIMDE_FLOAT64_C( -600.30) }, + { SIMDE_FLOAT64_C( 212.57), SIMDE_FLOAT64_C( -725.13) }, + { SIMDE_FLOAT64_C( 523.06), SIMDE_FLOAT64_C( -600.30) } }, + { { SIMDE_FLOAT64_C( 630.42), SIMDE_FLOAT64_C( -507.35) }, + { SIMDE_FLOAT64_C( 28.88), SIMDE_FLOAT64_C( -240.00) }, + { SIMDE_FLOAT64_C( 630.42), SIMDE_FLOAT64_C( -240.00) } }, + { { SIMDE_FLOAT64_C( 373.65), SIMDE_FLOAT64_C( 213.86) }, + { SIMDE_FLOAT64_C( -611.53), SIMDE_FLOAT64_C( 707.90) }, + { SIMDE_FLOAT64_C( 373.65), SIMDE_FLOAT64_C( 707.90) } }, + { { SIMDE_FLOAT64_C( 339.86), SIMDE_FLOAT64_C( -860.05) }, + { SIMDE_FLOAT64_C( 213.55), SIMDE_FLOAT64_C( -405.72) }, + { SIMDE_FLOAT64_C( 339.86), SIMDE_FLOAT64_C( -405.72) } }, + { { SIMDE_FLOAT64_C( -478.66), SIMDE_FLOAT64_C( -629.72) }, + { SIMDE_FLOAT64_C( 841.82), SIMDE_FLOAT64_C( 793.27) }, + { SIMDE_FLOAT64_C( 841.82), SIMDE_FLOAT64_C( 793.27) } }, + { { SIMDE_FLOAT64_C( -262.99), SIMDE_FLOAT64_C( 802.55) }, + { SIMDE_FLOAT64_C( -938.61), SIMDE_FLOAT64_C( 323.51) }, + { SIMDE_FLOAT64_C( -262.99), SIMDE_FLOAT64_C( 802.55) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_v128_t a = simde_wasm_v128_load(test_vec[i].a); + simde_v128_t b = simde_wasm_v128_load(test_vec[i].b); + simde_v128_t r = simde_wasm_f64x2_relaxed_max(a, b); + simde_test_wasm_f64x2_assert_equal(r, simde_wasm_v128_load(test_vec[i].r), 3); + } + return 0; + #else + fputc('\n', stdout); + simde_float64 inputs[8 * 2 * (sizeof(simde_v128_t) / sizeof(simde_float64))]; + simde_test_wasm_f64x2_random_full(8, 2, inputs, -SIMDE_FLOAT64_C(1000.0), SIMDE_FLOAT64_C(1000.0), HEDLEY_STATIC_CAST(SimdeTestVecFloatType, SIMDE_TEST_VEC_FLOAT_NAN)); + for (size_t i = 0 ; i < 8 ; i++) { + simde_v128_t + a = simde_test_wasm_f64x2_random_full_extract(2, inputs, i, 0), + b = simde_test_wasm_f64x2_random_full_extract(2, inputs, i, 1), + r; + + r = simde_wasm_f64x2_relaxed_max(a, b); + + simde_test_wasm_f64x2_write(3, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_wasm_f64x2_write(3, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_wasm_f64x2_write(3, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; + #endif +} + +SIMDE_TEST_FUNC_LIST_BEGIN + SIMDE_TEST_FUNC_LIST_ENTRY(wasm_f32x4_relaxed_max) + SIMDE_TEST_FUNC_LIST_ENTRY(wasm_f64x2_relaxed_max) +SIMDE_TEST_FUNC_LIST_END + +#include "test-relaxed-simd-footer.h" diff --git a/test/wasm/relaxed-simd/meson.build b/test/wasm/relaxed-simd/meson.build index 83412aec6..d46a0e477 100644 --- a/test/wasm/relaxed-simd/meson.build +++ b/test/wasm/relaxed-simd/meson.build @@ -1,6 +1,8 @@ simde_test_wasm_relaxed_simd_tests = [ 'laneselect', 'madd', + 'max', + 'min', 'nmadd', 'swizzle', 'trunc', diff --git a/test/wasm/relaxed-simd/min.c b/test/wasm/relaxed-simd/min.c new file mode 100644 index 000000000..04fb6b585 --- /dev/null +++ b/test/wasm/relaxed-simd/min.c @@ -0,0 +1,146 @@ +/* Copyright (c) 2021 Evan Nemerson + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define SIMDE_TEST_WASM_RELAXED_SIMD_INSN min +#include "../../../simde/wasm/relaxed-simd.h" +#include "test-relaxed-simd.h" + +static int +test_simde_wasm_f32x4_relaxed_min(SIMDE_MUNIT_TEST_ARGS) { + #if 1 + SIMDE_TEST_STRUCT_MODIFIERS struct { + simde_float32 a[sizeof(simde_v128_t) / sizeof(simde_float32)]; + simde_float32 b[sizeof(simde_v128_t) / sizeof(simde_float32)]; + simde_float32 r[sizeof(simde_v128_t) / sizeof(simde_float32)]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( -384.73), SIMDE_FLOAT32_C( -820.97), SIMDE_FLOAT32_C( 258.75), SIMDE_FLOAT32_C( -90.51) }, + { SIMDE_FLOAT32_C( 635.11), SIMDE_FLOAT32_C( -21.53), SIMDE_FLOAT32_C( -197.11), SIMDE_FLOAT32_C( 959.39) }, + { SIMDE_FLOAT32_C( -384.73), SIMDE_FLOAT32_C( -820.97), SIMDE_FLOAT32_C( -197.11), SIMDE_FLOAT32_C( -90.51) } }, + { { SIMDE_FLOAT32_C( 756.93), SIMDE_FLOAT32_C( -403.83), SIMDE_FLOAT32_C( -535.40), SIMDE_FLOAT32_C( 581.16) }, + { SIMDE_FLOAT32_C( 575.10), SIMDE_FLOAT32_C( -784.62), SIMDE_FLOAT32_C( 826.27), SIMDE_FLOAT32_C( -290.44) }, + { SIMDE_FLOAT32_C( 575.10), SIMDE_FLOAT32_C( -784.62), SIMDE_FLOAT32_C( -535.40), SIMDE_FLOAT32_C( -290.44) } }, + { { SIMDE_FLOAT32_C( -580.63), SIMDE_FLOAT32_C( 400.60), SIMDE_FLOAT32_C( -825.33), SIMDE_FLOAT32_C( 395.98) }, + { SIMDE_FLOAT32_C( 903.08), SIMDE_FLOAT32_C( 523.68), SIMDE_FLOAT32_C( -270.81), SIMDE_FLOAT32_C( -996.64) }, + { SIMDE_FLOAT32_C( -580.63), SIMDE_FLOAT32_C( 400.60), SIMDE_FLOAT32_C( -825.33), SIMDE_FLOAT32_C( -996.64) } }, + { { SIMDE_FLOAT32_C( -450.06), SIMDE_FLOAT32_C( -799.96), SIMDE_FLOAT32_C( 54.34), SIMDE_FLOAT32_C( 646.36) }, + { SIMDE_FLOAT32_C( -396.89), SIMDE_FLOAT32_C( -93.21), SIMDE_FLOAT32_C( -282.13), SIMDE_FLOAT32_C( -348.89) }, + { SIMDE_FLOAT32_C( -450.06), SIMDE_FLOAT32_C( -799.96), SIMDE_FLOAT32_C( -282.13), SIMDE_FLOAT32_C( -348.89) } }, + { { SIMDE_FLOAT32_C( 449.36), SIMDE_FLOAT32_C( -932.70), SIMDE_FLOAT32_C( 504.25), SIMDE_FLOAT32_C( 630.70) }, + { SIMDE_FLOAT32_C( 6.05), SIMDE_FLOAT32_C( 284.22), SIMDE_FLOAT32_C( -492.84), SIMDE_FLOAT32_C( -857.28) }, + { SIMDE_FLOAT32_C( 6.05), SIMDE_FLOAT32_C( -932.70), SIMDE_FLOAT32_C( -492.84), SIMDE_FLOAT32_C( -857.28) } }, + { { SIMDE_FLOAT32_C( -734.85), SIMDE_FLOAT32_C( 291.38), SIMDE_FLOAT32_C( 910.86), SIMDE_FLOAT32_C( -655.45) }, + { SIMDE_FLOAT32_C( 848.95), SIMDE_FLOAT32_C( -550.13), SIMDE_FLOAT32_C( 91.62), SIMDE_FLOAT32_C( 71.10) }, + { SIMDE_FLOAT32_C( -734.85), SIMDE_FLOAT32_C( -550.13), SIMDE_FLOAT32_C( 91.62), SIMDE_FLOAT32_C( -655.45) } }, + { { SIMDE_FLOAT32_C( 983.19), SIMDE_FLOAT32_C( 944.04), SIMDE_FLOAT32_C( -663.05), SIMDE_FLOAT32_C( 678.96) }, + { SIMDE_FLOAT32_C( -656.24), SIMDE_FLOAT32_C( -618.32), SIMDE_FLOAT32_C( 677.11), SIMDE_FLOAT32_C( -741.86) }, + { SIMDE_FLOAT32_C( -656.24), SIMDE_FLOAT32_C( -618.32), SIMDE_FLOAT32_C( -663.05), SIMDE_FLOAT32_C( -741.86) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_v128_t a = simde_wasm_v128_load(test_vec[i].a); + simde_v128_t b = simde_wasm_v128_load(test_vec[i].b); + simde_v128_t r = simde_wasm_f32x4_relaxed_min(a, b); + simde_test_wasm_f32x4_assert_equal(r, simde_wasm_v128_load(test_vec[i].r), INT_MAX); + } + return 0; + #else + fputc('\n', stdout); + simde_float32 inputs[8 * 2 * (sizeof(simde_v128_t) / sizeof(simde_float32))]; + simde_test_wasm_f32x4_random_full(8, 2, inputs, -SIMDE_FLOAT32_C(1000.0), SIMDE_FLOAT32_C(1000.0), HEDLEY_STATIC_CAST(SimdeTestVecFloatType, SIMDE_TEST_VEC_FLOAT_NAN | SIMDE_TEST_VEC_FLOAT_EQUAL)); + for (size_t i = 0 ; i < 8 ; i++) { + simde_v128_t + a = simde_test_wasm_f32x4_random_full_extract(2, inputs, i, 0), + b = simde_test_wasm_f32x4_random_full_extract(2, inputs, i, 1), + r; + + r = simde_wasm_f32x4_relaxed_min(a, b); + + simde_test_wasm_f32x4_write(3, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_wasm_f32x4_write(3, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_wasm_f32x4_write(3, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; + #endif +} + +static int +test_simde_wasm_f64x2_relaxed_min(SIMDE_MUNIT_TEST_ARGS) { + #if 1 + SIMDE_TEST_STRUCT_MODIFIERS struct { + simde_float64 a[sizeof(simde_v128_t) / sizeof(simde_float64)]; + simde_float64 b[sizeof(simde_v128_t) / sizeof(simde_float64)]; + simde_float64 r[sizeof(simde_v128_t) / sizeof(simde_float64)]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( -79.92), SIMDE_FLOAT64_C( 907.35) }, + { SIMDE_FLOAT64_C( -343.94), SIMDE_FLOAT64_C( 358.40) }, + { SIMDE_FLOAT64_C( -343.94), SIMDE_FLOAT64_C( 358.40) } }, + { { SIMDE_FLOAT64_C( 906.87), SIMDE_FLOAT64_C( 964.34) }, + { SIMDE_FLOAT64_C( 672.12), SIMDE_FLOAT64_C( 69.90) }, + { SIMDE_FLOAT64_C( 672.12), SIMDE_FLOAT64_C( 69.90) } }, + { { SIMDE_FLOAT64_C( 891.24), SIMDE_FLOAT64_C( 43.61) }, + { SIMDE_FLOAT64_C( 586.69), SIMDE_FLOAT64_C( 866.82) }, + { SIMDE_FLOAT64_C( 586.69), SIMDE_FLOAT64_C( 43.61) } }, + { { SIMDE_FLOAT64_C( 147.11), SIMDE_FLOAT64_C( -721.67) }, + { SIMDE_FLOAT64_C( 40.13), SIMDE_FLOAT64_C( 931.62) }, + { SIMDE_FLOAT64_C( 40.13), SIMDE_FLOAT64_C( -721.67) } }, + { { SIMDE_FLOAT64_C( 114.93), SIMDE_FLOAT64_C( -695.95) }, + { SIMDE_FLOAT64_C( -532.17), SIMDE_FLOAT64_C( -145.28) }, + { SIMDE_FLOAT64_C( -532.17), SIMDE_FLOAT64_C( -695.95) } }, + { { SIMDE_FLOAT64_C( 230.71), SIMDE_FLOAT64_C( -792.59) }, + { SIMDE_FLOAT64_C( 541.02), SIMDE_FLOAT64_C( 155.01) }, + { SIMDE_FLOAT64_C( 230.71), SIMDE_FLOAT64_C( -792.59) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_v128_t a = simde_wasm_v128_load(test_vec[i].a); + simde_v128_t b = simde_wasm_v128_load(test_vec[i].b); + simde_v128_t r = simde_wasm_f64x2_relaxed_min(a, b); + simde_test_wasm_f64x2_assert_equal(r, simde_wasm_v128_load(test_vec[i].r), INT_MAX); + } + return 0; + #else + fputc('\n', stdout); + simde_float64 inputs[8 * 2 * (sizeof(simde_v128_t) / sizeof(simde_float64))]; + simde_test_wasm_f64x2_random_full(8, 2, inputs, -SIMDE_FLOAT64_C(1000.0), SIMDE_FLOAT64_C(1000.0), HEDLEY_STATIC_CAST(SimdeTestVecFloatType, SIMDE_TEST_VEC_FLOAT_NAN)); + for (size_t i = 0 ; i < 8 ; i++) { + simde_v128_t + a = simde_test_wasm_f64x2_random_full_extract(2, inputs, i, 0), + b = simde_test_wasm_f64x2_random_full_extract(2, inputs, i, 1), + r; + + r = simde_wasm_f64x2_relaxed_min(a, b); + + simde_test_wasm_f64x2_write(3, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_wasm_f64x2_write(3, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_wasm_f64x2_write(3, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; + #endif +} + +SIMDE_TEST_FUNC_LIST_BEGIN + SIMDE_TEST_FUNC_LIST_ENTRY(wasm_f32x4_relaxed_min) + SIMDE_TEST_FUNC_LIST_ENTRY(wasm_f64x2_relaxed_min) +SIMDE_TEST_FUNC_LIST_END + +#include "test-relaxed-simd-footer.h"